figs <- list.files("./hybrids_cut_plots", pattern=".svg")
walk(figs, ~system(paste0("inkscape --export-dpi 150 -o ./hybrids_cut_plots/png/", str_replace(.x, ".svg",".png"), " ./hybrids_cut_plots/", .x)))
walk(figs, ~system(paste0("inkscape -o ./hybrids_cut_plots/eps/", str_replace(.x, ".svg",".eps"), " ./hybrids_cut_plots/", .x)))
greenhouse <- matrix(c(-117.8475746, 33.6472914), nrow=1) #latlong of UCI greenhouse
s <- read_tsv("data/UCI_GCMS/Schiedea Volatiles Sampling - Samples.tsv") %>%
drop_na(Start, Stop) %>% #2 bad samples with no Start, 1 missing Stop in 2019
nest(.by=SampleDate) %>% #sunset function is slow, run on unique dates
mutate(sunset = map_vec(SampleDate,
~maptools::sunriset(dateTime=ymd_hms(paste(.x, "13:00:00")),
direction="sunset", crds=greenhouse, POSIXct.out=T)$time),
solarnoon = map_vec(SampleDate,
~maptools::solarnoon(dateTime=ymd_hms(paste(.x, "13:00:00")),
crds=greenhouse, POSIXct.out=T)$time)) %>% unnest(data) %>%
mutate(StartSunset = difftime(ymd_hms(paste(SampleDate, Start)), sunset, units="hours"),
StopSunset = difftime(ymd_hms(paste(SampleDate, Stop)), sunset, units="hours"),
StartNoon = difftime(ymd_hms(paste(SampleDate, Start)), solarnoon, units="hours"))
CASsubs <- read_tsv("data/UCI_GCMS/CASsubs3.csv") %>% select(CAS, newCAS) %>% deframe()
data <- read_tsv("data/UCI_GCMS/svhyb_ambient_blanks.csv") %>%
distinct() %>% #generated some reports twice
select(FileName:`Base Peak`, Weighted, Reverse) %>%
rename(Path = FileName, Fraction = Amount, Match = Weighted) %>%
mutate(Path = str_sub(Path, 52, -5), #remove base directory and file extension
SampleDate = str_extract(Path, "\\\\(.*)", group=1), #file name after folder
Sample = if_else(str_sub(SampleDate, 1, 5)=="BLANK", SampleDate,
str_extract(SampleDate, "(.*?)_.*", group=1) %>% #drop GC index
#pad sample number to 3 digits with zeros
str_replace("(\\d+)", function(x) formatC(as.integer(x), width=3, format="d", flag="0")) %>%
str_replace("^KAHO", "SKAHO")),
CAS = str_replace(CAS, "(\\d{2,7})(\\d{2})(\\d{1})$","\\1-\\2-\\3") %>% #add dashes
recode(!!!CASsubs),#manual identifications
Name = str_remove(Name, "^>"),
across(c(SampleDate, Path, Model, Sample, Name, CAS), factor),
across(c(Fraction, Purity, `Min. Abund.`), ~ as.numeric(str_remove(.x, "%"))/100)) %>%
#AMDIS Base Peak only has the most common ion integrated
#instead, multiply fractions by total peak areas from sum_chromatograms.R
left_join(read_tsv("data/UCI_GCMS/svhyb_TICsums.csv") %>%
mutate(SampleDate = str_extract(file, "(.*).CDF", group=1))) %>%
mutate(Area = Fraction * sumTIC)
chems.pc <- read_tsv("data/UCI_GCMS/chemspc3.csv") %>% select(CAS, IUPAC.Name) %>% mutate(IUPAC.Name = str_replace(IUPAC.Name, "~\\{(.*?)\\}", "\\1"))
Filtering
#Load filtered scent data (includes octenol and hexenol) and metadata
# load("./data/UCI_GCMS/sdata3_hexoct.Rdata")
# TODO some red flags in the Notes column - possible wrong IDs
# sv$spec <- spec
# save(sv, vol, file = "./data/UCI_GCMS/sdata3_hexoct_svvol.Rdata")
load("./data/UCI_GCMS/sdata3_hexoct_svvol.Rdata")
#convert to nanograms per hour using standards for each compound class
vocs <- colnames(vol)
chemsf <- tibble(shortname = vocs) %>%# two VOCs got renamed since table was made
bind_cols(read_tsv("data/UCI_GCMS/chemspc3_classes.csv")) %>%
left_join(read_tsv("data/UCI_GCMS/multipliers.csv"))
vol <- sweep(vol, 2, chemsf$ngPerArea, FUN= `*`)
Sample inventory
eve <- -2.5 #hours before sunset
hybcolr <- c(`S. kaalae` = "#31A354", `K x H` = "#FD8D3C", `H x K` = "#A65628", `S. hookeri` = "#756BB1")
hyblabs <- c(expression(italic("S. kaalae")),"K x H", "H x K", expression(italic("S. hookeri")))
renamepops <- c(`879WKG` = "879", WK = "794", WK = "866", WK = "891", WK = "899", #lump Waianae Kai populations
`3587WP` = "3587", `892WKG` = "892", `904WPG` = "904")
kahopops <- unique(names(renamepops))
hybpopsp <- c(K = "3587WP", H = "879WKG", K = "881KHV", K = "892WKG", K = "904WPG",
HH = "HH", HK = "HK", KH = "KH", KK = "KK", H = "WK")
#Get maternal and paternal populations of crosses
kahomp <- sv %>% rownames_to_column() %>% filter(Species == "kaho") %>% select(rowname, Plant) %>%
separate(Plant, into = c("Maternal", "Paternal"), sep=" x ") %>%
mutate(across(ends_with("aternal"), ~str_split_i(.x, "-", 1) %>%
fct_recode(!!!renamepops) %>% fct_relevel(kahopops)))
#Add columns to metadata describing the plant
svhyb <- sv %>% rownames_to_column() %>% left_join(kahomp) %>%
mutate(specp = case_match(Population, "KK" ~ "KAAL", "HH" ~ "HOOK", .default=spec) %>%
fct_relevel(c("HOOK","KAHO","KAAL")), #Split cross offspring (spec=KAHO) into parents and hybrids
Population2 = factor(if_else(Species == "kaho", if_else(Maternal==Paternal, Maternal, "Interpop"), Population)),
SpeciesR = if_else(specp=="KAHO", Population, specp) %>%
factor(levels=c("KAAL","KH","HK","HOOK")) %>%
fct_recode("S. kaalae"="KAAL", "K x H"="KH", "H x K"="HK", "S. hookeri"="HOOK"),
DN = factor(if_else(StartSunset > eve, "Night", "Day")),
Inflo = if_else(is.na(Inflo), as.character(1:n()), Inflo),
Diurnal = if_else(is.na(Diurnal), as.character(1:n()), Diurnal),
plants = paste(Population, Plant, sep="-"),
plantsi = paste(Population, Plant, Cutting, sep="-"),
plantsinflo = paste(Population, Plant, Cutting, Inflo, sep="-"),
plantsinflodate = paste(Population, Plant, Cutting, Inflo, SampleDate, sep="-"),
parents = if_else(str_detect(plants, fixed(" x ")), as.character(1:n()), plants),
ftot = rowSums(vol) / Flrs) %>%
arrange(specp, Population)
svhybn <- filter(svhyb, DN == "Night")
#"parents" set the crosses to an integer sequence to force them not to group
#TODO this unfortunately splits resamples of the sample cross plant into multiple "parents"
#TODO also, NMDS below implies these are different cross parents but they are really just the original plant in the parent generation that was used for cuttings
volhybn <- vol[svhybn$rowname,] #emission rate, nanograms per hour
pavolhybn <- decostand(volhybn, method = "pa") #presence/absence
fvolhybn <- volhybn / svhybn$Flrs #emission rate per flower
#first take the average for each infloresence
#Inflos are marked by a letter but those are reused arbitrarily across SampleDates
svhybn.inflo <- bind_cols(svhybn, fvolhybn) %>%
group_by(SpeciesR, Species, Population, Maternal, Paternal, plants, plantsi, Inflo, SampleDate) %>%
summarize(across(all_of(vocs), mean), .groups = "drop")
fvolhybn.inflo <- svhybn.inflo[,vocs]
#now average infloresences by plant
#TODO temporarily got rid of parents since it was splitting multiple inflos on same plant
svhybn.plants <- svhybn.inflo %>%
group_by(SpeciesR, Species, Population, Maternal, Paternal, plants, plantsi) %>%
summarize(across(all_of(vocs), mean), .groups = "drop")
fvolhybn.plants <- svhybn.plants[,vocs]
Samples by cross
There are also four 881KHV samples (not used to make crosses since
from the Ko’olaus), and two samples without populations (an HH and a
KH).
inventory.cross <- svhybn %>% filter(Species == "kaho") %>%
drop_na(Maternal, Paternal) %>% #two samples where parent populations are unknown
with(table(Maternal, Paternal))
inventory.all <- inventory.cross + diag(table(svhybn$Population)[rownames(inventory.cross)])
kable(inventory.all, caption="evening samples of crosses and parents")
evening samples of crosses and parents
879WKG |
10 |
2 |
8 |
1 |
3 |
WK |
5 |
18 |
4 |
6 |
6 |
3587WP |
3 |
3 |
9 |
0 |
6 |
892WKG |
0 |
0 |
4 |
5 |
1 |
904WPG |
4 |
2 |
2 |
1 |
6 |
#how many maternal and paternal parent plants?
mp <- svhybn %>% filter(Species =="kaho") %>% select(Plant) %>% mutate(Plant=as.character(Plant)) %>%
separate(Plant, sep=" x ", into = c("Maternal", "Paternal")) %>%
separate(Maternal, sep="-", into = c("MomPop", "MomPlant"), remove=F, extra="merge") %>%
separate(Paternal, sep="-", into = c("DadPop", "DadPlant"), remove=F, extra="merge")
mp %>% group_by(MomPop) %>% summarize(moms=n_distinct(MomPlant)) %>% kable(caption="maternal plants")
maternal plants
3587 |
5 |
794 |
2 |
866 |
1 |
879 |
4 |
892 |
3 |
899 |
1 |
904 |
3 |
NA |
1 |
mp %>% group_by(DadPop) %>% summarize(moms=n_distinct(DadPlant)) %>% kable(caption="paternal plants")
paternal plants
3587 |
4 |
794 |
2 |
866 |
2 |
879 |
4 |
892 |
3 |
899 |
1 |
904 |
3 |
NA |
1 |
Resampled plants
svhybn.inflo %>% count(SpeciesR, plantsi) %>% filter(n>1) %>% kable(caption = "number of infloresences per plant")
number of infloresences per plant
S. kaalae |
881KHV-2-Self 8 |
2 |
S. kaalae |
892WKG-3-NA |
2 |
S. kaalae |
KK-3587-15 x 904-3-218-4 |
3 |
S. kaalae |
KK-3587-15 x 904-3-218-5 |
2 |
S. kaalae |
KK-892-1 x 3587-7-120-1 |
2 |
K x H |
KH-3587-7 x 879-10-1-189-3 |
2 |
H x K |
HK-794-4 x 892-3-81-5 |
2 |
H x K |
HK-794-4 x 904-5-85-4 |
2 |
H x K |
HK-879-10-1 x 3587-C-30-2 |
4 |
S. hookeri |
HH-866-2 x 879-2-2-89-6 |
2 |
S. hookeri |
WK-2-Clone 1 |
2 |
S. hookeri |
WK-3-11– |
2 |
S. hookeri |
WK-Loc 2-Clone 2 |
2 |
Evening plants by cross
remember to add the 3 881KHV plants (not used to make crosses since
from the Ko’olaus)
there are also two plants from crosses without populations (HH and
KH) - added these to 879x879 and 892xWK in the inventory
plants.cross <- svhybn.plants %>% filter(Species == "kaho") %>%
drop_na(Maternal, Paternal) %>% #two samples where parent populations are unknown
with(table(Maternal, Paternal))
kable(plants.cross, caption="cross plants")
cross plants
879WKG |
9 |
2 |
3 |
1 |
3 |
WK |
4 |
6 |
4 |
5 |
5 |
3587WP |
2 |
3 |
5 |
0 |
3 |
892WKG |
0 |
0 |
3 |
3 |
1 |
904WPG |
4 |
2 |
2 |
1 |
4 |
plants.parent <- diag(table(svhybn.plants$Population)[rownames(plants.cross)])
rownames(plants.parent) <- rownames(plants.cross); colnames(plants.parent) <- colnames(plants.cross)
kable(plants.parent, caption="parent plants")
parent plants
879WKG |
1 |
0 |
0 |
0 |
0 |
WK |
0 |
9 |
0 |
0 |
0 |
3587WP |
0 |
0 |
4 |
0 |
0 |
892WKG |
0 |
0 |
0 |
1 |
0 |
904WPG |
0 |
0 |
0 |
0 |
2 |
Sampling times
print("night pumping times, hrs:")
[1] "night pumping times, hrs:"
summary(as.numeric(as.difftime(format.POSIXct(svhybn$SampleDateStart, tz="GMT+8", format="%H:%M"),format="%H:%M"))) #convert PDT to PST when needed
Min. 1st Qu. Median Mean 3rd Qu. Max.
16.40 18.77 19.45 19.30 20.02 21.27
print("night pumping times, hr since sunset:")
[1] "night pumping times, hr since sunset:"
summary(as.numeric(svhybn$StartSunset))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2.434 1.196 1.895 1.702 2.482 4.545
Heatmap
class_subs <- c("Irregular terpene"="Other terpenoid", "Monoterpene"="Monoterpenoid")
classcolr <- c(Aliphatic="#BC0060",Benzenoid="#027B8C",Monoterpenoid="#5d9400",`Other terpenoid`="#E56E00", Sesquiterpene="#000000")
#1 dimensional NMDS for ordering samples in heatmap
set.seed(1)
mdshybn1 <- metaMDS(decostand(fvolhybn.plants, method="hellinger"),k=1,try=200, autotransform=F, trace=0)
srt <- order(svhybn.plants$SpeciesR, -mdshybn1$points, decreasing=T)
#Include volatiles found in at least this many samples
mincount <- 15
callback = function(hc, mat){
sv = svd(t(mat))$v[,1]
dend = reorder(as.dendrogram(hc), wts = sv)
as.hclust(dend)
}
heat.mat <- as.matrix(t(fvolhybn.plants[,colSums(pavolhybn)>mincount][srt,]))^(1/3)
colnames(heat.mat) <- rownames(fvolhybn.plants)[srt]
ph <- pheatmap(heat.mat,
cluster_cols=F, show_colnames=F,
clustering_method="mcquitty", clustering_distance_rows="correlation",
clustering_callback = function(hc, ...){dendsort(hc, type="average")},
scale="none", color=inferno(512),
annotation_col = data.frame(Species=as.integer(svhybn.plants$SpeciesR)[srt], row.names=rownames(fvolhybn.plants)[srt]),
annotation_colors = list(Species=hybcolr, Class=classcolr), annotation_names_col = F,
gaps_col = which(as.logical(diff(as.integer(svhybn.plants$SpeciesR)[srt]))),
annotation_row = data.frame(Class=chemsf$Class %>% str_remove("s$") %>% recode(!!!class_subs) %>% factor(),
row.names = chemsf$shortname), annotation_names_row = F,
cellwidth = 3.5, cellheight = 14, fontsize = 10, border_color = NA, legend=F, legend_breaks=NA, annotation_legend=F, cutree_rows=5
)
downViewport("col_annotation.3-3-3-3")
middles <- enframe(svhybn.plants$SpeciesR[srt]) %>% mutate(name=(name-0.5)/max(name-0.5)) %>% summarize(name=mean(name), .by=value) %>% pull(name)
grid.text(rev(levels(svhyb$SpeciesR))[c(1,4)], x=middles[c(1,4)],y=0.57, gp=gpar(fontsize=10, col="white", fontface=4))
grid.text(rev(levels(svhyb$SpeciesR))[c(2,3)], x=middles[c(2,3)],y=0.57, gp=gpar(fontsize=10, col="white", fontface=2))

Boxplots of each volatile
diversity_vars <- c(ftot = "Total volatile emissions (ng/flr/hr)",
num_compounds="Number of compounds",
shannon="Shannon diversity index")
svhybn.plants$ftot <- rowSums(fvolhybn.plants)
svhybn.plants$num_compounds <- rowSums(decostand(fvolhybn.plants, method="pa"))
svhybn.plants$shannon <- diversity(fvolhybn.plants, index="shannon")
multiline <- c("DMNT"="(3E)-4,8-dimethylnona-1,3,7-triene",
"2,2,6-trimethylcyclohexane-\n1,4-dione"="2,2,6-trimethylcyclohexane-1,4-dione",
"4-hydroxy-2,6,6-trimethyl-\n3-oxocyclohexene-\n1-carbaldehyde"=
"4-hydroxy-2,6,6-trimethyl-3-oxocyclohexene-1-carbaldehyde")
minprop <- 0.25 #compound must occur in greater than this proportion of evening samples
fvolhybn.top <- fvolhybn.plants %>%
select(where(~sum(.x>0) > length(.x) * minprop)) %>%
bind_cols(select(svhybn.plants, Species=SpeciesR, all_of(names(diversity_vars)))) %>%
pivot_longer(!Species, names_to="variable") %>%
mutate(variable = fct_recode(variable, !!!multiline))
#compare mean of hybrid cross directions to mean of parent species
#compare hybrid cross directions to each other
tests.glht <- fvolhybn.top %>% group_by(variable) %>% nest() %>%
mutate(model = map(data, ~lm(value ~ Species, data=.x)),
emm = map(model, ~tidy(summary(emmeans(.x, specs="Species")))),
test.direction = map(model, ~tidy(multcomp::glht(.x, linfct = multcomp::mcp(Species=c("`K x H` - `H x K` == 0"))))),
test.parmean = map(model, ~tidy(multcomp::glht(.x, linfct = multcomp::mcp(Species=c("0.5 * `K x H` + 0.5 * `H x K` - 0.5 * `S. kaalae` - 0.5 * `S. hookeri`== 0"))))))
tests.emm <- tests.glht %>% mutate(maximum = map_dbl(data, ~max(.x$value))) %>%
select(variable, emm, maximum) %>% unnest(emm)
tests.index <- tests.emm %>%
select(variable, Species, maximum, estimate) %>% pivot_wider(names_from=Species, values_from=estimate) %>%
mutate(par.mean = (`S. hookeri` + `S. kaalae`)/2,
hyb.mean = (`K x H`+`H x K`)/2,
hyb.index = if_else(`S. kaalae`> `S. hookeri`,
(hyb.mean -`S. hookeri`)/ `S. kaalae`,
(hyb.mean -`S. kaalae`)/ `S. hookeri`))
tests.direction <- tests.glht %>% select(variable, test.direction) %>% unnest(test.direction) %>%
select(variable, direction.p.value = adj.p.value)
tests.parmean <- tests.glht %>% select(variable, test.parmean) %>% unnest(test.parmean) %>%
select(variable, parmean.p.value = adj.p.value)
boxplot.order <- tests.index %>% arrange(-hyb.index) %>% pull(variable)
tests.all <- tests.index %>% left_join(tests.direction) %>% left_join(tests.parmean) %>%
mutate(variable=factor(variable, levels=boxplot.order))
tests.vols <- tests.all %>% filter(!variable %in% names(diversity_vars)) %>%
mutate(across(contains("p.value"), ~p.adjust(.x, method="fdr")))
tests.vols %>% select(-c(maximum, hyb.index)) %>% mutate(variable=str_remove_all(variable, "\n")) %>%
kable(caption = "tests of total emissions and diversity", digits=3)
tests of total emissions and diversity
benzaldehyde |
0.079 |
0.033 |
0.041 |
0.311 |
0.195 |
0.037 |
0.915 |
0.001 |
octan-3-one |
0.454 |
0.839 |
1.183 |
0.709 |
0.581 |
1.011 |
0.398 |
0.084 |
4-oxoisophorone |
0.132 |
0.003 |
0.009 |
0.000 |
0.066 |
0.006 |
0.884 |
0.017 |
indole |
0.002 |
0.031 |
0.016 |
0.137 |
0.070 |
0.024 |
0.634 |
0.021 |
2-phenylacetaldehyde |
0.297 |
0.006 |
0.004 |
0.006 |
0.151 |
0.005 |
0.994 |
0.208 |
methyl 2-aminobenzoate |
0.000 |
0.020 |
0.005 |
0.086 |
0.043 |
0.012 |
0.588 |
0.086 |
4-hydroxy-2,6,6-trimethyl-3-oxocyclohexene-1-carbaldehyde |
0.027 |
0.001 |
0.001 |
0.001 |
0.014 |
0.001 |
0.989 |
0.031 |
DMNT |
0.001 |
0.000 |
0.006 |
0.091 |
0.046 |
0.003 |
0.856 |
0.024 |
2,2,6-trimethylcyclohexane-1,4-dione |
0.056 |
0.000 |
0.001 |
0.000 |
0.028 |
0.001 |
0.976 |
0.006 |
propylcyclopropane |
0.022 |
0.041 |
0.013 |
0.088 |
0.055 |
0.027 |
0.544 |
0.317 |
3,5,5-trimethylhex-2-ene |
0.003 |
0.085 |
0.146 |
0.190 |
0.097 |
0.116 |
0.514 |
0.738 |
unknown benzenoid |
0.003 |
0.046 |
0.032 |
0.240 |
0.121 |
0.039 |
0.840 |
0.047 |
oct-1-en-3-ol |
3.202 |
6.101 |
6.170 |
3.506 |
3.354 |
6.135 |
0.974 |
0.033 |
linalool oxide (pyranoid) ketone |
4.956 |
2.194 |
2.972 |
0.086 |
2.521 |
2.583 |
0.452 |
0.921 |
linalool oxide (pyranoid) |
4.035 |
0.288 |
0.427 |
0.024 |
2.030 |
0.358 |
0.856 |
0.000 |
(E)-hex-3-enal |
0.000 |
0.027 |
0.090 |
0.279 |
0.140 |
0.059 |
0.459 |
0.121 |
linalool oxide (furanoid) |
2.134 |
0.257 |
0.326 |
0.020 |
1.077 |
0.292 |
0.841 |
0.000 |
hexanal |
0.595 |
0.322 |
0.489 |
0.805 |
0.700 |
0.406 |
0.428 |
0.023 |
(E)-hex-2-enal |
0.000 |
0.037 |
0.050 |
0.355 |
0.178 |
0.043 |
0.886 |
0.014 |
linalool |
0.041 |
0.001 |
0.046 |
0.020 |
0.031 |
0.024 |
0.282 |
0.784 |
(Z)-hex-3-en-1-ol |
0.053 |
1.238 |
2.377 |
5.913 |
2.983 |
1.807 |
0.565 |
0.330 |
heptane-2,3-dione |
0.003 |
0.237 |
0.158 |
0.392 |
0.198 |
0.198 |
0.524 |
1.000 |
α-phellandrene |
0.355 |
0.033 |
0.020 |
0.016 |
0.185 |
0.027 |
0.922 |
0.043 |
p-cymene |
0.075 |
0.002 |
0.005 |
0.013 |
0.044 |
0.004 |
0.932 |
0.009 |
emm.vols <- tests.emm %>% filter(!variable %in% names(diversity_vars))
signif.num <- function(x) {
cut(x, breaks = c(0, 0.001, 0.01, 0.05, 1),
labels = c("***", "**", "*", " "), include.lowest = T)
}
fvolhybn.top %>% mutate(variable=factor(variable, levels=boxplot.order)) %>%
filter(!variable %in% names(diversity_vars)) %>%
ggplot() + geom_boxplot(aes(x=Species, y=value, color=Species), width=0.5, outlier.size=1) +
geom_text(data=tests.vols, aes(label=signif.num(parmean.p.value), x=2.5, y=pmax(par.mean,hyb.mean)*1.5)) +
geom_point(data=emm.vols, aes(x=Species, y=pmax(estimate,0), color=Species), shape=4, size=2)+
geom_point(data=tests.vols, aes(x=2.5, y=par.mean, shape="parmean"), size=2)+
geom_point(data=tests.vols, aes(x=2.5, y=hyb.mean, shape="hybmean"), size=2)+
scale_shape_manual("Mean",values=c(parmean=3, hybmean=5), labels=c(parmean="Species", hybmean="Hybrids"))+
facet_wrap(vars(variable), scales="free_y", ncol=4, labeller = labeller(variable = label_wrap_gen(18))) +
scale_color_manual("", values=hybcolr, labels=hyblabs) +
scale_y_sqrt("Emission rate (ng/flower/hr)", limits=c(0,NA)) + theme_minimal() +
theme(axis.text.x=element_blank(), axis.title.x=element_blank(),
panel.grid.major.x=element_blank(), panel.grid.minor.y=element_blank(), legend.position = "top")

Hybrid additivity vs. differences between parents
library(ggrepel)
vol_floor <- 1e-5 #avoid negative estimates, 10x less than smallest positive estimate
tests.vols %>% mutate(lfc_species = log2(pmax(`S. kaalae`,vol_floor)/pmax(`S. hookeri`,vol_floor)),
lfc_hybrids = log2(hyb.mean/par.mean)) %>% ungroup() %>%
left_join(chemsf %>% mutate(variable=fct_recode(Name, !!!multiline))) %>%
mutate(Class = Class %>% str_remove("s$") %>% recode(!!!class_subs) %>% factor()) %>%
ggplot(aes(x=lfc_species, y=lfc_hybrids, color=Class)) +
geom_vline(xintercept=0, color="grey40") + geom_hline(yintercept=0, color="grey40") +
geom_point(aes(shape = parmean.p.value < 0.05), size=2.5) + scale_shape_manual(values=c(1,19), guide="none")+
geom_text_repel(aes(label=variable), size=3.5, min.segment.length=5, show.legend = F) +
scale_color_manual(values=classcolr) +
scale_x_continuous(expand=expansion(c(0.1, 0.25)))+
theme_minimal() + theme(panel.grid.minor = element_blank(), legend.position = "top") +
labs(x="log2 fold change in emissions between parents (S. kaalae / S. hookeri)",
y="log2 fold change in emissions between hybrids and mean of parents",color="")

Total emissions and diversity
tests.diversity <- tests.all %>% filter(variable %in% names(diversity_vars)) %>%
mutate(across(contains("p.value"), ~p.adjust(.x, method="fdr")))
tests.diversity %>% select(-c(maximum, hyb.index)) %>% kable(caption = "tests of total emissions and diversity", digits=3)
tests of total emissions and diversity
ftot |
17.036 |
12.131 |
15.204 |
15.147 |
16.091 |
13.667 |
0.555 |
0.444 |
num_compounds |
18.250 |
18.833 |
19.524 |
19.969 |
19.109 |
19.179 |
0.696 |
0.949 |
shannon |
1.721 |
1.638 |
1.742 |
2.016 |
1.869 |
1.690 |
0.261 |
0.002 |
emm.diversity <- tests.emm %>% filter(variable %in% names(diversity_vars))
fvolhybn.top %>% filter(variable %in% names(diversity_vars)) %>%
ggplot() + geom_boxplot(aes(x=Species, y=value, color=Species), width=0.5, outlier.size=1) +
geom_text(data=tests.diversity, aes(label=signif.num(parmean.p.value), x=2.5, y=pmax(par.mean,hyb.mean)*1.1)) +
geom_point(data=emm.diversity, aes(x=Species, y=pmax(estimate,0), color=Species), shape=4, size=2)+
geom_point(data=tests.diversity, aes(x=2.5, y=par.mean, shape="parmean"), size=2)+
geom_point(data=tests.diversity, aes(x=2.5, y=hyb.mean, shape="hybmean"), size=2)+
scale_shape_manual("Mean",values=c(parmean=3, hybmean=5), labels=c(parmean="Species", hybmean="Hybrids"))+
facet_wrap(vars(variable), scales="free_y", ncol=4, labeller = as_labeller(diversity_vars)) +
scale_color_manual("", values=hybcolr, labels=hyblabs) +
scale_y_continuous("", limits=c(0,NA)) + theme_minimal() +
theme(axis.text.x=element_blank(), axis.title.x=element_blank(),
panel.grid.major.x=element_blank(), panel.grid.minor.y=element_blank(), legend.position = "top")

NMDS ordination
Each sample (includes resamples)
set.seed(1)
mdshybn <- metaMDS(decostand(fvolhybn, method="hellinger"),k=2,try=200, autotransform=F, trace=0)
mdshybn
Call:
metaMDS(comm = decostand(fvolhybn, method = "hellinger"), k = 2, try = 200, autotransform = F, trace = 0)
global Multidimensional Scaling using monoMDS
Data: decostand(fvolhybn, method = "hellinger")
Distance: bray
Dimensions: 2
Stress: 0.1208547
Stress type 1, weak ties
Best solution was repeated 3 times in 20 tries
The best solution was from try 7 (random start)
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'decostand(fvolhybn, method = "hellinger")'
hullcross <- gg_ordiplot(mdshybn, groups = svhybn$plants, plot=F)$df_hull
hullparents <- gg_ordiplot(mdshybn, groups = svhybn$parents, plot=F)$df_hull
hullplantsi <- gg_ordiplot(mdshybn, groups = svhybn$plantsi, plot=F)$df_hull
hullplantsinflo <- gg_ordiplot(mdshybn, groups = svhybn$plantsinflo, plot=F)$df_hull
hullplantsinflodate <- gg_ordiplot(mdshybn, groups = svhybn$plantsinflodate, plot=F)$df_hull
obj <- fortify(mdshybn)
obj$Sample <- NA; obj$Sample[obj$score=="sites"] <- svhybn$Sample
obj <- obj %>% left_join(svhybn %>% dplyr::select(c(Sample, SpeciesR, plants, parents, plantsi, plantsinflo, plantsinflodate, StartSunset))) %>% mutate(StartSunset = as.numeric(StartSunset))
obj$occur <- NA; obj$occur[obj$score=="species"] <- colSums(pavolhybn)
obj$ftotal <- NA; obj$ftotal[obj$score=="species"] <- colSums(fvolhybn)
obj <- obj %>% arrange(NMDS2, NMDS1)
hybnplot <- ggplot(obj, aes(x=-NMDS1, y=NMDS2)) + xlab("NMDS1") +
geom_path(data=filter(obj, score=="sites"),aes(group=plants, color="cross"), linewidth=1) +
geom_path(data=filter(obj, score=="sites"),aes(group=parents, color="parents"), linewidth=1) +
geom_path(data=filter(obj, score=="sites"),aes(group=plantsi, color="plantsi"), linewidth=1) +
geom_path(data=filter(obj, score=="sites"),aes(group=plantsinflo, color="plantsinflo"), linewidth=1) +
geom_path(data=filter(obj, score=="sites"),aes(group=plantsinflodate, color="plantsinflodate"), linewidth=1) +
scale_color_manual("",breaks=c("plantsinflodate", "plantsinflo", "plantsi", "parents", "cross"),
labels= c("Bag", "Infloresence","Plant", "Clone", "Cross"),
values=c(cross="grey80", parents="grey50", plantsi="grey20",
plantsinflo="goldenrod",plantsinflodate="#E41A1C",sites="white",species=NA)) +
coord_fixed(xlim=range(obj$NMDS1[obj$score=="sites"])+c(-0.42,0.45),
ylim=range(obj$NMDS2[obj$score=="sites"])) + theme_pubr()+
scale_x_continuous(expand=c(0,0)) +
theme(legend.text = element_text(size=13)) +
guides(fill = guide_legend(override.aes = list(size=6)), color=guide_legend(override.aes=list(size=4)))
(hybplotn.spec <- hybnplot +
scale_fill_manual("", values=hybcolr, labels=hyblabs, na.translate=F) +
geom_point(data=obj[obj$score=="sites",], aes(fill=SpeciesR), shape=21, size=3, color="white") +
geom_text(data=obj[obj$score=="species" & obj$occur>nrow(volhybn)*minprop,], aes(label=label), color="black", size=3.8, fontface=2) )

Mean of each plant
set.seed(1)
mdshybn.mean <- metaMDS(decostand(fvolhybn.plants, method="hellinger"),k=2,try=100, autotransform=F, trace=0)
mdshybn.mean
Call:
metaMDS(comm = decostand(fvolhybn.plants, method = "hellinger"), k = 2, try = 100, autotransform = F, trace = 0)
global Multidimensional Scaling using monoMDS
Data: decostand(fvolhybn.plants, method = "hellinger")
Distance: bray
Dimensions: 2
Stress: 0.118831
Stress type 1, weak ties
Best solution was repeated 2 times in 20 tries
The best solution was from try 0 (metric scaling or null solution)
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'decostand(fvolhybn.plants, method = "hellinger")'
obj.mean <- fortify(mdshybn.mean)
obj.mean$SpeciesR <- factor(NA,levels=levels(svhybn.plants$SpeciesR)); obj.mean$SpeciesR[obj.mean$score=="sites"] <- svhybn.plants$SpeciesR
obj.mean$plants <- factor(NA,levels=unique(svhybn.plants$plants)); obj.mean$plants[obj.mean$score=="sites"] <- svhybn.plants$plants
#obj.mean$parents <- factor(NA,levels=levels(svhybn.plants$parents)); obj.mean$parents[obj.mean$score=="sites"] <- svhybn.plants$parents
obj.mean$occur <- NA; obj.mean$occur[obj.mean$score=="species"] <- colSums(pavolhybn)
obj.mean$ftotal <- NA; obj.mean$ftotal[obj.mean$score=="species"] <- colSums(fvolhybn.plants)
obj.mean <- obj.mean %>% arrange(NMDS2, NMDS1)
(hybplotn.mean.spec <-
ggplot(obj.mean, aes(x=NMDS1, y=NMDS2)) +
coord_fixed(xlim=range(obj.mean$NMDS1[obj.mean$score=="sites"])+c(-0.55,0.1),
ylim=range(obj.mean$NMDS2[obj.mean$score=="sites"])) +
theme_pubr()+
scale_x_continuous(expand=c(0,0))+
theme(legend.text = element_text(size=13)) +
guides(fill = guide_legend(override.aes = list(size=6)), color=guide_legend(override.aes=list(size=4)))+
scale_fill_manual("", values=hybcolr, labels=hyblabs, na.translate=F) +
scale_color_manual("",breaks=c("cross","parents"), labels=c("Cross","Clone"), values=c(cross="grey80")) +
geom_path(data=obj.mean[obj.mean$score=="sites",],aes(group=plants, color="cross"), size=1) +
#geom_path(data=obj.mean[obj.mean$score=="sites",],aes(group=parents, color="parents"), size=1) +
geom_point(data=obj.mean[obj.mean$score=="sites",], aes(fill=SpeciesR), shape=21, size=3, color="white") +
geom_text(data=obj.mean[obj.mean$score=="species" & obj.mean$occur>nrow(volhybn)*minprop,], aes(label=label), color="black", alpha=1, size=3.8, fontface=2)
)

Direction of cross PERMANOVA
hybridf.plants <- svhybn.plants$SpeciesR %in% c("K x H", "H x K")
adonis2(decostand(fvolhybn.plants[hybridf.plants,], method="total")~ SpeciesR, data = svhybn[hybridf.plants,])
Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999
adonis2(formula = decostand(fvolhybn.plants[hybridf.plants, ], method = "total") ~ SpeciesR, data = svhybn[hybridf.plants, ])
Df SumOfSqs R2 F Pr(>F)
Model 2 0.03414 0.01556 0.237 0.979
Residual 30 2.16030 0.98444
Total 32 2.19444 1.00000
Variation at different levels
Variation among infloresences within plants
To measure between-trap variation, two traps were inserted into one
bag, for two bags enclosing two infloresences on one plant (four traps
total). Average them to get a dataset of unique infloresences.
bd.plantsi.inflo <- betadisper(vegdist(decostand(fvolhybn.inflo, method="hellinger"), method="bray"),
group = svhybn.inflo$plantsi)
meandist.plantsi.inflo <- bd.plantsi.inflo$group.distances %>% enframe(name="plantsi", value="centroid_dist") %>%
left_join(count(svhybn, plantsi)) %>% filter(n>1)#plants with resamples
summary(meandist.plantsi.inflo$centroid_dist)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1027 0.1327 0.1388 0.1581 0.1770 0.2716
Variation among plants in each cross type
#betadisper(vegdist(decostand(volhybn, method="hellinger"), method="bray"), group = specpn) #sample level
(bd.SpeciesR <- betadisper(vegdist(decostand(fvolhybn.plants, method="hellinger"), method="bray"),
group = svhybn.plants$SpeciesR))
Homogeneity of multivariate dispersions
Call: betadisper(d = vegdist(decostand(fvolhybn.plants, method =
"hellinger"), method = "bray"), group = svhybn.plants$SpeciesR)
No. of Positive Eigenvalues: 51
No. of Negative Eigenvalues: 45
Average distance to median:
S. kaalae K x H H x K S. hookeri
0.2071 0.1944 0.2443 0.3252
Eigenvalues for PCoA axes:
(Showing 8 of 96 eigenvalues)
PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
6.3652 2.3196 0.9928 0.8622 0.5520 0.4399 0.3716 0.3383
anova(bd.SpeciesR)
Analysis of Variance Table
Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 3 0.27694 0.092312 14.847 5.644e-08 ***
Residuals 93 0.57825 0.006218
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(bd.SpeciesR)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = distances ~ group, data = df)
$group
diff lwr upr p adj
K x H-S. kaalae -0.01268834 -0.08251652 0.05713984 0.9643323
H x K-S. kaalae 0.03713597 -0.02079667 0.09506861 0.3416208
S. hookeri-S. kaalae 0.11808392 0.06651239 0.16965545 0.0000002
H x K-K x H 0.04982431 -0.02482515 0.12447378 0.3060649
S. hookeri-K x H 0.13077226 0.06094408 0.20060044 0.0000238
S. hookeri-H x K 0.08094795 0.02301531 0.13888059 0.0023642
tibble(group=bd.SpeciesR$group, distance = bd.SpeciesR$distances) %>%
bind_rows(bd.plantsi.inflo$distances %>% enframe(value="distance") %>% filter(distance!=0) %>% mutate(group="within")) %>%
mutate(group = fct_relevel(group, c(levels(svhybn$SpeciesR), "within"))) %>%
ggplot(aes(x=group, y=distance, color=group)) +
geom_boxplot(outlier.shape = NA) + geom_jitter(width=0.15, height=0) + geom_vline(xintercept = 4.5) +
scale_color_manual(values=hybcolr, guide="none") +
scale_y_continuous(limits=c(0,NA), expand = expansion(c(0,0.05))) + scale_x_discrete(labels=c(hyblabs, "Within plant")) +
theme_classic() + theme(axis.title.x=element_blank(), axis.ticks.x=element_blank()) + labs(y="Bray-Curtis distance to centroid")
