figs <- list.files("./hybrids_cut_plots", pattern=".svg")
walk(figs, ~system(paste0("inkscape --export-dpi 150 -o ./hybrids_cut_plots/png/", str_replace(.x, ".svg",".png"), " ./hybrids_cut_plots/", .x)))
walk(figs, ~system(paste0("inkscape -o ./hybrids_cut_plots/eps/", str_replace(.x, ".svg",".eps"), " ./hybrids_cut_plots/", .x)))
greenhouse <- matrix(c(-117.8475746, 33.6472914), nrow=1) #latlong of UCI greenhouse

s <- read_tsv("data/UCI_GCMS/Schiedea Volatiles Sampling - Samples.tsv") %>% 
  drop_na(Start, Stop) %>% #2 bad samples with no Start, 1 missing Stop in 2019
  nest(.by=SampleDate) %>% #sunset function is slow, run on unique dates
  mutate(sunset = map_vec(SampleDate, 
                          ~maptools::sunriset(dateTime=ymd_hms(paste(.x, "13:00:00")),
                                              direction="sunset", crds=greenhouse, POSIXct.out=T)$time),
         solarnoon = map_vec(SampleDate, 
                          ~maptools::solarnoon(dateTime=ymd_hms(paste(.x, "13:00:00")),
                                              crds=greenhouse, POSIXct.out=T)$time)) %>% unnest(data) %>% 
  mutate(StartSunset = difftime(ymd_hms(paste(SampleDate, Start)), sunset, units="hours"),
         StopSunset =  difftime(ymd_hms(paste(SampleDate, Stop)), sunset, units="hours"),
         StartNoon =   difftime(ymd_hms(paste(SampleDate, Start)), solarnoon, units="hours"))

CASsubs <- read_tsv("data/UCI_GCMS/CASsubs3.csv") %>% select(CAS, newCAS) %>% deframe()

data <- read_tsv("data/UCI_GCMS/svhyb_ambient_blanks.csv") %>% 
  distinct() %>% #generated some reports twice
  select(FileName:`Base Peak`, Weighted, Reverse) %>% 
  rename(Path = FileName, Fraction = Amount, Match = Weighted) %>% 
  mutate(Path = str_sub(Path, 52, -5), #remove base directory and file extension
         SampleDate = str_extract(Path, "\\\\(.*)", group=1), #file name after folder
         Sample = if_else(str_sub(SampleDate, 1, 5)=="BLANK", SampleDate, 
          str_extract(SampleDate, "(.*?)_.*", group=1) %>% #drop GC index
            #pad sample number to 3 digits with zeros
            str_replace("(\\d+)", function(x) formatC(as.integer(x), width=3, format="d", flag="0")) %>% 
            str_replace("^KAHO", "SKAHO")),
         CAS = str_replace(CAS, "(\\d{2,7})(\\d{2})(\\d{1})$","\\1-\\2-\\3") %>% #add dashes
           recode(!!!CASsubs),#manual identifications
         Name = str_remove(Name, "^>"),
         across(c(SampleDate, Path, Model, Sample, Name, CAS), factor),
         across(c(Fraction, Purity, `Min. Abund.`), ~ as.numeric(str_remove(.x, "%"))/100)) %>% 
  #AMDIS Base Peak only has the most common ion integrated
  #instead, multiply fractions by total peak areas from sum_chromatograms.R
  left_join(read_tsv("data/UCI_GCMS/svhyb_TICsums.csv") %>% 
              mutate(SampleDate = str_extract(file, "(.*).CDF", group=1))) %>% 
  mutate(Area = Fraction * sumTIC)

chems.pc <- read_tsv("data/UCI_GCMS/chemspc3.csv") %>% select(CAS, IUPAC.Name) %>% mutate(IUPAC.Name = str_replace(IUPAC.Name, "~\\{(.*?)\\}", "\\1"))

Filtering

#Load filtered scent data (includes octenol and hexenol) and metadata
# load("./data/UCI_GCMS/sdata3_hexoct.Rdata")
# TODO some red flags in the Notes column - possible wrong IDs
# sv$spec <- spec
# save(sv, vol, file = "./data/UCI_GCMS/sdata3_hexoct_svvol.Rdata")
load("./data/UCI_GCMS/sdata3_hexoct_svvol.Rdata")

#convert to nanograms per hour using standards for each compound class
vocs <- colnames(vol)
chemsf <- tibble(shortname = vocs) %>%# two VOCs got renamed since table was made 
  bind_cols(read_tsv("data/UCI_GCMS/chemspc3_classes.csv")) %>% 
  left_join(read_tsv("data/UCI_GCMS/multipliers.csv"))
vol <- sweep(vol, 2, chemsf$ngPerArea, FUN= `*`)

Sample inventory

eve <- -2.5 #hours before sunset
hybcolr <- c(`S. kaalae` = "#31A354", `K x H` = "#FD8D3C", `H x K` = "#A65628", `S. hookeri` = "#756BB1")
hyblabs <- c(expression(italic("S. kaalae")),"K x H", "H x K", expression(italic("S. hookeri")))
renamepops <- c(`879WKG` = "879", WK = "794", WK = "866", WK = "891", WK = "899", #lump Waianae Kai populations
                `3587WP` = "3587", `892WKG` = "892", `904WPG` = "904")
kahopops <- unique(names(renamepops))
hybpopsp <- c(K = "3587WP", H = "879WKG", K = "881KHV", K = "892WKG", K = "904WPG",
              HH = "HH", HK = "HK", KH = "KH", KK = "KK", H = "WK")

#Get maternal and paternal populations of crosses
kahomp <- sv %>% rownames_to_column() %>% filter(Species == "kaho") %>% select(rowname, Plant) %>% 
  separate(Plant, into = c("Maternal", "Paternal"), sep=" x ") %>% 
  mutate(across(ends_with("aternal"), ~str_split_i(.x, "-", 1) %>% 
                  fct_recode(!!!renamepops) %>% fct_relevel(kahopops)))

#Add columns to metadata describing the plant
svhyb <- sv %>% rownames_to_column() %>% left_join(kahomp) %>% 
  mutate(specp = case_match(Population, "KK" ~ "KAAL", "HH" ~ "HOOK", .default=spec) %>% 
           fct_relevel(c("HOOK","KAHO","KAAL")), #Split cross offspring (spec=KAHO) into parents and hybrids 
         Population2 = factor(if_else(Species == "kaho", if_else(Maternal==Paternal, Maternal, "Interpop"), Population)),
         SpeciesR = if_else(specp=="KAHO", Population, specp) %>% 
           factor(levels=c("KAAL","KH","HK","HOOK")) %>% 
           fct_recode("S. kaalae"="KAAL", "K x H"="KH", "H x K"="HK", "S. hookeri"="HOOK"),
         DN = factor(if_else(StartSunset > eve, "Night", "Day")),
         Inflo = if_else(is.na(Inflo), as.character(1:n()), Inflo),
         Diurnal = if_else(is.na(Diurnal), as.character(1:n()), Diurnal),
         plants =          paste(Population, Plant, sep="-"),
         plantsi =         paste(Population, Plant, Cutting, sep="-"),
         plantsinflo =     paste(Population, Plant, Cutting, Inflo, sep="-"),
         plantsinflodate = paste(Population, Plant, Cutting, Inflo, SampleDate, sep="-"),
         parents = if_else(str_detect(plants, fixed(" x ")), as.character(1:n()), plants),
         ftot = rowSums(vol) / Flrs) %>% 
  arrange(specp, Population)

svhybn <- filter(svhyb, DN == "Night")
#"parents" set the crosses to an integer sequence to force them not to group 
#TODO this unfortunately splits resamples of the sample cross plant into multiple "parents"
#TODO also, NMDS below implies these are different cross parents but they are really just the original plant in the parent generation that was used for cuttings
volhybn <- vol[svhybn$rowname,] #emission rate, nanograms per hour
pavolhybn <- decostand(volhybn, method = "pa") #presence/absence
fvolhybn <- volhybn / svhybn$Flrs #emission rate per flower

#first take the average for each infloresence
#Inflos are marked by a letter but those are reused arbitrarily across SampleDates
svhybn.inflo <- bind_cols(svhybn, fvolhybn) %>% 
 group_by(SpeciesR, Species, Population, Maternal, Paternal, plants, plantsi, Inflo, SampleDate) %>% 
  summarize(across(all_of(vocs), mean), .groups = "drop")
fvolhybn.inflo <- svhybn.inflo[,vocs]

#now average infloresences by plant
#TODO temporarily got rid of parents since it was splitting multiple inflos on same plant
svhybn.plants <- svhybn.inflo %>% 
 group_by(SpeciesR, Species, Population, Maternal, Paternal, plants, plantsi) %>% 
  summarize(across(all_of(vocs), mean), .groups = "drop")
fvolhybn.plants <- svhybn.plants[,vocs]

Samples by cross

There are also four 881KHV samples (not used to make crosses since from the Ko’olaus), and two samples without populations (an HH and a KH).

inventory.cross <- svhybn %>% filter(Species == "kaho") %>%
  drop_na(Maternal, Paternal) %>% #two samples where parent populations are unknown
  with(table(Maternal, Paternal)) 
inventory.all <- inventory.cross + diag(table(svhybn$Population)[rownames(inventory.cross)])
kable(inventory.all, caption="evening samples of crosses and parents")
evening samples of crosses and parents
879WKG WK 3587WP 892WKG 904WPG
879WKG 10 2 8 1 3
WK 5 18 4 6 6
3587WP 3 3 9 0 6
892WKG 0 0 4 5 1
904WPG 4 2 2 1 6
#how many maternal and paternal parent plants?
mp <- svhybn %>% filter(Species =="kaho") %>% select(Plant) %>% mutate(Plant=as.character(Plant)) %>%
  separate(Plant, sep=" x ", into = c("Maternal", "Paternal")) %>% 
  separate(Maternal, sep="-", into = c("MomPop", "MomPlant"), remove=F, extra="merge") %>% 
  separate(Paternal, sep="-", into = c("DadPop", "DadPlant"), remove=F, extra="merge")
mp %>% group_by(MomPop) %>% summarize(moms=n_distinct(MomPlant)) %>% kable(caption="maternal plants")
maternal plants
MomPop moms
3587 5
794 2
866 1
879 4
892 3
899 1
904 3
NA 1
mp %>% group_by(DadPop) %>% summarize(moms=n_distinct(DadPlant)) %>% kable(caption="paternal plants")
paternal plants
DadPop moms
3587 4
794 2
866 2
879 4
892 3
899 1
904 3
NA 1

Resampled plants

svhybn.inflo %>% count(SpeciesR, plantsi) %>% filter(n>1) %>% kable(caption = "number of infloresences per plant")
number of infloresences per plant
SpeciesR plantsi n
S. kaalae 881KHV-2-Self 8 2
S. kaalae 892WKG-3-NA 2
S. kaalae KK-3587-15 x 904-3-218-4 3
S. kaalae KK-3587-15 x 904-3-218-5 2
S. kaalae KK-892-1 x 3587-7-120-1 2
K x H KH-3587-7 x 879-10-1-189-3 2
H x K HK-794-4 x 892-3-81-5 2
H x K HK-794-4 x 904-5-85-4 2
H x K HK-879-10-1 x 3587-C-30-2 4
S. hookeri HH-866-2 x 879-2-2-89-6 2
S. hookeri WK-2-Clone 1 2
S. hookeri WK-3-11– 2
S. hookeri WK-Loc 2-Clone 2 2

Evening plants by cross

remember to add the 3 881KHV plants (not used to make crosses since from the Ko’olaus)

there are also two plants from crosses without populations (HH and KH) - added these to 879x879 and 892xWK in the inventory

plants.cross <- svhybn.plants %>% filter(Species == "kaho") %>%
  drop_na(Maternal, Paternal) %>% #two samples where parent populations are unknown
  with(table(Maternal, Paternal)) 
kable(plants.cross, caption="cross plants")
cross plants
879WKG WK 3587WP 892WKG 904WPG
879WKG 9 2 3 1 3
WK 4 6 4 5 5
3587WP 2 3 5 0 3
892WKG 0 0 3 3 1
904WPG 4 2 2 1 4
plants.parent <- diag(table(svhybn.plants$Population)[rownames(plants.cross)])
rownames(plants.parent) <- rownames(plants.cross); colnames(plants.parent) <- colnames(plants.cross)
kable(plants.parent, caption="parent plants")
parent plants
879WKG WK 3587WP 892WKG 904WPG
879WKG 1 0 0 0 0
WK 0 9 0 0 0
3587WP 0 0 4 0 0
892WKG 0 0 0 1 0
904WPG 0 0 0 0 2

Sampling times

print("night pumping times, hrs:")
[1] "night pumping times, hrs:"
summary(as.numeric(as.difftime(format.POSIXct(svhybn$SampleDateStart, tz="GMT+8", format="%H:%M"),format="%H:%M"))) #convert PDT to PST when needed
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  16.40   18.77   19.45   19.30   20.02   21.27 
print("night pumping times, hr since sunset:")
[1] "night pumping times, hr since sunset:"
summary(as.numeric(svhybn$StartSunset))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -2.434   1.196   1.895   1.702   2.482   4.545 

Heatmap

class_subs <- c("Irregular terpene"="Other terpenoid", "Monoterpene"="Monoterpenoid")
classcolr <- c(Aliphatic="#BC0060",Benzenoid="#027B8C",Monoterpenoid="#5d9400",`Other terpenoid`="#E56E00", Sesquiterpene="#000000")

#1 dimensional NMDS for ordering samples in heatmap
set.seed(1)
mdshybn1 <- metaMDS(decostand(fvolhybn.plants, method="hellinger"),k=1,try=200, autotransform=F, trace=0)
srt <- order(svhybn.plants$SpeciesR, -mdshybn1$points, decreasing=T)

#Include volatiles found in at least this many samples
mincount <- 15

callback = function(hc, mat){
    sv = svd(t(mat))$v[,1]
    dend = reorder(as.dendrogram(hc), wts = sv)
    as.hclust(dend)
}
heat.mat <- as.matrix(t(fvolhybn.plants[,colSums(pavolhybn)>mincount][srt,]))^(1/3)
colnames(heat.mat) <- rownames(fvolhybn.plants)[srt]
ph <- pheatmap(heat.mat, 
         cluster_cols=F, show_colnames=F,
         clustering_method="mcquitty", clustering_distance_rows="correlation",
         clustering_callback = function(hc, ...){dendsort(hc, type="average")},
         scale="none", color=inferno(512),
         annotation_col = data.frame(Species=as.integer(svhybn.plants$SpeciesR)[srt], row.names=rownames(fvolhybn.plants)[srt]), 
         annotation_colors = list(Species=hybcolr, Class=classcolr), annotation_names_col = F,
         gaps_col = which(as.logical(diff(as.integer(svhybn.plants$SpeciesR)[srt]))),
         annotation_row = data.frame(Class=chemsf$Class %>% str_remove("s$") %>% recode(!!!class_subs) %>% factor(), 
                                     row.names = chemsf$shortname), annotation_names_row = F,
   cellwidth = 3.5, cellheight = 14, fontsize = 10, border_color = NA, legend=F, legend_breaks=NA, annotation_legend=F, cutree_rows=5
)
downViewport("col_annotation.3-3-3-3")
middles <- enframe(svhybn.plants$SpeciesR[srt]) %>% mutate(name=(name-0.5)/max(name-0.5)) %>% summarize(name=mean(name), .by=value) %>% pull(name)
grid.text(rev(levels(svhyb$SpeciesR))[c(1,4)], x=middles[c(1,4)],y=0.57, gp=gpar(fontsize=10, col="white", fontface=4))
grid.text(rev(levels(svhyb$SpeciesR))[c(2,3)], x=middles[c(2,3)],y=0.57, gp=gpar(fontsize=10, col="white", fontface=2))

Boxplots of each volatile

diversity_vars <- c(ftot = "Total volatile emissions (ng/flr/hr)", 
                    num_compounds="Number of compounds", 
                    shannon="Shannon diversity index")
svhybn.plants$ftot <- rowSums(fvolhybn.plants)
svhybn.plants$num_compounds <- rowSums(decostand(fvolhybn.plants, method="pa"))
svhybn.plants$shannon <- diversity(fvolhybn.plants, index="shannon")

multiline <- c("DMNT"="(3E)-4,8-dimethylnona-1,3,7-triene",
               "2,2,6-trimethylcyclohexane-\n1,4-dione"="2,2,6-trimethylcyclohexane-1,4-dione",
               "4-hydroxy-2,6,6-trimethyl-\n3-oxocyclohexene-\n1-carbaldehyde"=
                 "4-hydroxy-2,6,6-trimethyl-3-oxocyclohexene-1-carbaldehyde")

minprop <- 0.25 #compound must occur in greater than this proportion of evening samples
fvolhybn.top <- fvolhybn.plants %>% 
  select(where(~sum(.x>0) > length(.x) * minprop)) %>% 
  bind_cols(select(svhybn.plants, Species=SpeciesR, all_of(names(diversity_vars)))) %>% 
  pivot_longer(!Species, names_to="variable") %>% 
  mutate(variable = fct_recode(variable, !!!multiline))

#compare mean of hybrid cross directions to mean of parent species
#compare hybrid cross directions to each other
tests.glht <- fvolhybn.top %>% group_by(variable) %>% nest() %>% 
  mutate(model = map(data, ~lm(value ~ Species, data=.x)),
         emm = map(model, ~tidy(summary(emmeans(.x, specs="Species")))),
         test.direction = map(model, ~tidy(multcomp::glht(.x, linfct = multcomp::mcp(Species=c("`K x H` - `H x K` == 0"))))),
         test.parmean = map(model, ~tidy(multcomp::glht(.x, linfct = multcomp::mcp(Species=c("0.5 * `K x H` + 0.5 * `H x K` - 0.5 * `S. kaalae` - 0.5 * `S. hookeri`== 0"))))))

tests.emm <- tests.glht %>% mutate(maximum = map_dbl(data, ~max(.x$value))) %>% 
  select(variable, emm, maximum) %>% unnest(emm)
tests.index <- tests.emm %>% 
  select(variable, Species, maximum, estimate) %>% pivot_wider(names_from=Species, values_from=estimate) %>% 
  mutate(par.mean = (`S. hookeri` + `S. kaalae`)/2,
         hyb.mean = (`K x H`+`H x K`)/2,
         hyb.index = if_else(`S. kaalae`> `S. hookeri`, 
                             (hyb.mean -`S. hookeri`)/ `S. kaalae`, 
                             (hyb.mean -`S. kaalae`)/ `S. hookeri`))
tests.direction <- tests.glht %>% select(variable, test.direction) %>% unnest(test.direction) %>% 
  select(variable, direction.p.value = adj.p.value)
tests.parmean <- tests.glht %>% select(variable, test.parmean) %>% unnest(test.parmean) %>% 
  select(variable, parmean.p.value = adj.p.value)
boxplot.order <- tests.index %>% arrange(-hyb.index) %>% pull(variable)
tests.all <- tests.index %>% left_join(tests.direction) %>% left_join(tests.parmean) %>%
  mutate(variable=factor(variable, levels=boxplot.order))

tests.vols <- tests.all %>% filter(!variable %in% names(diversity_vars)) %>% 
    mutate(across(contains("p.value"), ~p.adjust(.x, method="fdr")))
tests.vols %>% select(-c(maximum, hyb.index)) %>% mutate(variable=str_remove_all(variable, "\n")) %>% 
  kable(caption = "tests of total emissions and diversity", digits=3)
tests of total emissions and diversity
variable S. kaalae K x H H x K S. hookeri par.mean hyb.mean direction.p.value parmean.p.value
benzaldehyde 0.079 0.033 0.041 0.311 0.195 0.037 0.915 0.001
octan-3-one 0.454 0.839 1.183 0.709 0.581 1.011 0.398 0.084
4-oxoisophorone 0.132 0.003 0.009 0.000 0.066 0.006 0.884 0.017
indole 0.002 0.031 0.016 0.137 0.070 0.024 0.634 0.021
2-phenylacetaldehyde 0.297 0.006 0.004 0.006 0.151 0.005 0.994 0.208
methyl 2-aminobenzoate 0.000 0.020 0.005 0.086 0.043 0.012 0.588 0.086
4-hydroxy-2,6,6-trimethyl-3-oxocyclohexene-1-carbaldehyde 0.027 0.001 0.001 0.001 0.014 0.001 0.989 0.031
DMNT 0.001 0.000 0.006 0.091 0.046 0.003 0.856 0.024
2,2,6-trimethylcyclohexane-1,4-dione 0.056 0.000 0.001 0.000 0.028 0.001 0.976 0.006
propylcyclopropane 0.022 0.041 0.013 0.088 0.055 0.027 0.544 0.317
3,5,5-trimethylhex-2-ene 0.003 0.085 0.146 0.190 0.097 0.116 0.514 0.738
unknown benzenoid 0.003 0.046 0.032 0.240 0.121 0.039 0.840 0.047
oct-1-en-3-ol 3.202 6.101 6.170 3.506 3.354 6.135 0.974 0.033
linalool oxide (pyranoid) ketone 4.956 2.194 2.972 0.086 2.521 2.583 0.452 0.921
linalool oxide (pyranoid) 4.035 0.288 0.427 0.024 2.030 0.358 0.856 0.000
(E)-hex-3-enal 0.000 0.027 0.090 0.279 0.140 0.059 0.459 0.121
linalool oxide (furanoid) 2.134 0.257 0.326 0.020 1.077 0.292 0.841 0.000
hexanal 0.595 0.322 0.489 0.805 0.700 0.406 0.428 0.023
(E)-hex-2-enal 0.000 0.037 0.050 0.355 0.178 0.043 0.886 0.014
linalool 0.041 0.001 0.046 0.020 0.031 0.024 0.282 0.784
(Z)-hex-3-en-1-ol 0.053 1.238 2.377 5.913 2.983 1.807 0.565 0.330
heptane-2,3-dione 0.003 0.237 0.158 0.392 0.198 0.198 0.524 1.000
α-phellandrene 0.355 0.033 0.020 0.016 0.185 0.027 0.922 0.043
p-cymene 0.075 0.002 0.005 0.013 0.044 0.004 0.932 0.009
emm.vols <- tests.emm %>% filter(!variable %in% names(diversity_vars)) 

signif.num <- function(x) {
   cut(x, breaks = c(0, 0.001, 0.01, 0.05, 1), 
           labels = c("***", "**", "*", " "), include.lowest = T)
}
fvolhybn.top %>% mutate(variable=factor(variable, levels=boxplot.order)) %>% 
  filter(!variable %in% names(diversity_vars)) %>% 
  ggplot() +  geom_boxplot(aes(x=Species, y=value, color=Species), width=0.5, outlier.size=1) + 
  geom_text(data=tests.vols, aes(label=signif.num(parmean.p.value), x=2.5, y=pmax(par.mean,hyb.mean)*1.5)) + 
  geom_point(data=emm.vols, aes(x=Species, y=pmax(estimate,0), color=Species), shape=4, size=2)+
  geom_point(data=tests.vols, aes(x=2.5, y=par.mean, shape="parmean"), size=2)+
  geom_point(data=tests.vols, aes(x=2.5, y=hyb.mean, shape="hybmean"), size=2)+
  scale_shape_manual("Mean",values=c(parmean=3, hybmean=5), labels=c(parmean="Species", hybmean="Hybrids"))+
  facet_wrap(vars(variable), scales="free_y", ncol=4, labeller = labeller(variable = label_wrap_gen(18))) +
  scale_color_manual("", values=hybcolr, labels=hyblabs) +
  scale_y_sqrt("Emission rate (ng/flower/hr)", limits=c(0,NA)) + theme_minimal() + 
  theme(axis.text.x=element_blank(), axis.title.x=element_blank(), 
        panel.grid.major.x=element_blank(), panel.grid.minor.y=element_blank(), legend.position = "top")

Hybrid additivity vs. differences between parents

library(ggrepel)
vol_floor <- 1e-5 #avoid negative estimates, 10x less than smallest positive estimate
tests.vols %>%  mutate(lfc_species = log2(pmax(`S. kaalae`,vol_floor)/pmax(`S. hookeri`,vol_floor)), 
                       lfc_hybrids = log2(hyb.mean/par.mean)) %>% ungroup() %>% 
  left_join(chemsf %>% mutate(variable=fct_recode(Name, !!!multiline))) %>% 
  mutate(Class = Class %>% str_remove("s$") %>% recode(!!!class_subs) %>% factor()) %>% 
  ggplot(aes(x=lfc_species, y=lfc_hybrids, color=Class)) + 
  geom_vline(xintercept=0, color="grey40") + geom_hline(yintercept=0, color="grey40") + 
  geom_point(aes(shape = parmean.p.value < 0.05), size=2.5) + scale_shape_manual(values=c(1,19), guide="none")+
  geom_text_repel(aes(label=variable), size=3.5, min.segment.length=5, show.legend = F)  +
  scale_color_manual(values=classcolr) +
  scale_x_continuous(expand=expansion(c(0.1, 0.25)))+ 
  theme_minimal() + theme(panel.grid.minor = element_blank(), legend.position = "top") +
  labs(x="log2 fold change in emissions between parents (S. kaalae / S. hookeri)",
       y="log2 fold change in emissions between hybrids and mean of parents",color="")

Total emissions and diversity

tests.diversity <- tests.all %>% filter(variable %in% names(diversity_vars)) %>% 
    mutate(across(contains("p.value"), ~p.adjust(.x, method="fdr"))) 
tests.diversity %>% select(-c(maximum, hyb.index)) %>% kable(caption = "tests of total emissions and diversity", digits=3)
tests of total emissions and diversity
variable S. kaalae K x H H x K S. hookeri par.mean hyb.mean direction.p.value parmean.p.value
ftot 17.036 12.131 15.204 15.147 16.091 13.667 0.555 0.444
num_compounds 18.250 18.833 19.524 19.969 19.109 19.179 0.696 0.949
shannon 1.721 1.638 1.742 2.016 1.869 1.690 0.261 0.002
emm.diversity <- tests.emm %>% filter(variable %in% names(diversity_vars)) 

fvolhybn.top %>% filter(variable %in% names(diversity_vars)) %>% 
  ggplot() +  geom_boxplot(aes(x=Species, y=value, color=Species), width=0.5, outlier.size=1) + 
  geom_text(data=tests.diversity, aes(label=signif.num(parmean.p.value), x=2.5, y=pmax(par.mean,hyb.mean)*1.1)) + 
  geom_point(data=emm.diversity, aes(x=Species, y=pmax(estimate,0), color=Species), shape=4, size=2)+
  geom_point(data=tests.diversity, aes(x=2.5, y=par.mean, shape="parmean"), size=2)+
  geom_point(data=tests.diversity, aes(x=2.5, y=hyb.mean, shape="hybmean"), size=2)+
  scale_shape_manual("Mean",values=c(parmean=3, hybmean=5), labels=c(parmean="Species", hybmean="Hybrids"))+
  facet_wrap(vars(variable), scales="free_y", ncol=4, labeller = as_labeller(diversity_vars)) +
  scale_color_manual("", values=hybcolr, labels=hyblabs) + 
  scale_y_continuous("", limits=c(0,NA)) + theme_minimal() + 
  theme(axis.text.x=element_blank(), axis.title.x=element_blank(), 
        panel.grid.major.x=element_blank(), panel.grid.minor.y=element_blank(), legend.position = "top")

NMDS ordination

Each sample (includes resamples)

set.seed(1)
mdshybn <- metaMDS(decostand(fvolhybn, method="hellinger"),k=2,try=200, autotransform=F, trace=0)
mdshybn

Call:
metaMDS(comm = decostand(fvolhybn, method = "hellinger"), k = 2,      try = 200, autotransform = F, trace = 0) 

global Multidimensional Scaling using monoMDS

Data:     decostand(fvolhybn, method = "hellinger") 
Distance: bray 

Dimensions: 2 
Stress:     0.1208547 
Stress type 1, weak ties
Best solution was repeated 3 times in 20 tries
The best solution was from try 7 (random start)
Scaling: centring, PC rotation, halfchange scaling 
Species: expanded scores based on 'decostand(fvolhybn, method = "hellinger")' 
hullcross <- gg_ordiplot(mdshybn, groups = svhybn$plants, plot=F)$df_hull
hullparents <- gg_ordiplot(mdshybn, groups = svhybn$parents, plot=F)$df_hull
hullplantsi <- gg_ordiplot(mdshybn, groups = svhybn$plantsi, plot=F)$df_hull
hullplantsinflo <- gg_ordiplot(mdshybn, groups = svhybn$plantsinflo, plot=F)$df_hull
hullplantsinflodate <- gg_ordiplot(mdshybn, groups = svhybn$plantsinflodate, plot=F)$df_hull

obj <- fortify(mdshybn)
obj$Sample <- NA; obj$Sample[obj$score=="sites"] <-  svhybn$Sample
obj <- obj %>% left_join(svhybn %>% dplyr::select(c(Sample, SpeciesR, plants, parents, plantsi, plantsinflo, plantsinflodate, StartSunset))) %>% mutate(StartSunset = as.numeric(StartSunset))
obj$occur <- NA; obj$occur[obj$score=="species"] <- colSums(pavolhybn)
obj$ftotal <- NA; obj$ftotal[obj$score=="species"] <- colSums(fvolhybn)
obj <- obj %>% arrange(NMDS2, NMDS1)

hybnplot <- ggplot(obj, aes(x=-NMDS1, y=NMDS2)) + xlab("NMDS1") +
    geom_path(data=filter(obj, score=="sites"),aes(group=plants, color="cross"), linewidth=1) +
    geom_path(data=filter(obj, score=="sites"),aes(group=parents, color="parents"), linewidth=1) +
    geom_path(data=filter(obj, score=="sites"),aes(group=plantsi, color="plantsi"), linewidth=1) +
    geom_path(data=filter(obj, score=="sites"),aes(group=plantsinflo, color="plantsinflo"), linewidth=1) +
    geom_path(data=filter(obj, score=="sites"),aes(group=plantsinflodate, color="plantsinflodate"), linewidth=1) +
  scale_color_manual("",breaks=c("plantsinflodate", "plantsinflo", "plantsi", "parents", "cross"),
                     labels=   c("Bag",             "Infloresence","Plant",   "Clone",   "Cross"),
                     values=c(cross="grey80", parents="grey50", plantsi="grey20",
                              plantsinflo="goldenrod",plantsinflodate="#E41A1C",sites="white",species=NA)) +
      coord_fixed(xlim=range(obj$NMDS1[obj$score=="sites"])+c(-0.42,0.45), 
                  ylim=range(obj$NMDS2[obj$score=="sites"])) + theme_pubr()+
    scale_x_continuous(expand=c(0,0)) +
    theme(legend.text = element_text(size=13)) + 
    guides(fill = guide_legend(override.aes = list(size=6)), color=guide_legend(override.aes=list(size=4)))

(hybplotn.spec <- hybnplot + 
        scale_fill_manual("", values=hybcolr, labels=hyblabs,  na.translate=F) +
  geom_point(data=obj[obj$score=="sites",], aes(fill=SpeciesR), shape=21, size=3, color="white") +
    geom_text(data=obj[obj$score=="species" & obj$occur>nrow(volhybn)*minprop,], aes(label=label), color="black", size=3.8, fontface=2) ) 

Mean of each plant

set.seed(1)
mdshybn.mean <- metaMDS(decostand(fvolhybn.plants, method="hellinger"),k=2,try=100, autotransform=F, trace=0)
mdshybn.mean

Call:
metaMDS(comm = decostand(fvolhybn.plants, method = "hellinger"),      k = 2, try = 100, autotransform = F, trace = 0) 

global Multidimensional Scaling using monoMDS

Data:     decostand(fvolhybn.plants, method = "hellinger") 
Distance: bray 

Dimensions: 2 
Stress:     0.118831 
Stress type 1, weak ties
Best solution was repeated 2 times in 20 tries
The best solution was from try 0 (metric scaling or null solution)
Scaling: centring, PC rotation, halfchange scaling 
Species: expanded scores based on 'decostand(fvolhybn.plants, method = "hellinger")' 
obj.mean <- fortify(mdshybn.mean)
obj.mean$SpeciesR <- factor(NA,levels=levels(svhybn.plants$SpeciesR)); obj.mean$SpeciesR[obj.mean$score=="sites"] <-  svhybn.plants$SpeciesR
obj.mean$plants <- factor(NA,levels=unique(svhybn.plants$plants)); obj.mean$plants[obj.mean$score=="sites"] <-  svhybn.plants$plants
#obj.mean$parents <- factor(NA,levels=levels(svhybn.plants$parents)); obj.mean$parents[obj.mean$score=="sites"] <-  svhybn.plants$parents
obj.mean$occur <- NA; obj.mean$occur[obj.mean$score=="species"] <- colSums(pavolhybn)
obj.mean$ftotal <- NA; obj.mean$ftotal[obj.mean$score=="species"] <- colSums(fvolhybn.plants)
obj.mean <- obj.mean %>% arrange(NMDS2, NMDS1)

(hybplotn.mean.spec <- 
    ggplot(obj.mean, aes(x=NMDS1, y=NMDS2)) + 
    coord_fixed(xlim=range(obj.mean$NMDS1[obj.mean$score=="sites"])+c(-0.55,0.1), 
                ylim=range(obj.mean$NMDS2[obj.mean$score=="sites"])) + 
    theme_pubr()+
    scale_x_continuous(expand=c(0,0))+
    theme(legend.text = element_text(size=13)) + 
    guides(fill = guide_legend(override.aes = list(size=6)), color=guide_legend(override.aes=list(size=4)))+
    scale_fill_manual("", values=hybcolr, labels=hyblabs,  na.translate=F) +
      scale_color_manual("",breaks=c("cross","parents"), labels=c("Cross","Clone"), values=c(cross="grey80")) +
    geom_path(data=obj.mean[obj.mean$score=="sites",],aes(group=plants, color="cross"), size=1) +
    #geom_path(data=obj.mean[obj.mean$score=="sites",],aes(group=parents, color="parents"), size=1) +
    geom_point(data=obj.mean[obj.mean$score=="sites",], aes(fill=SpeciesR), shape=21, size=3, color="white") +
    geom_text(data=obj.mean[obj.mean$score=="species" & obj.mean$occur>nrow(volhybn)*minprop,], aes(label=label), color="black",  alpha=1, size=3.8, fontface=2) 
    ) 

Direction of cross PERMANOVA

hybridf.plants <- svhybn.plants$SpeciesR %in% c("K x H", "H x K")
adonis2(decostand(fvolhybn.plants[hybridf.plants,], method="total")~ SpeciesR, data = svhybn[hybridf.plants,])
Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = decostand(fvolhybn.plants[hybridf.plants, ], method = "total") ~ SpeciesR, data = svhybn[hybridf.plants, ])
         Df SumOfSqs      R2     F Pr(>F)
Model     2  0.03414 0.01556 0.237  0.979
Residual 30  2.16030 0.98444             
Total    32  2.19444 1.00000             

Variation at different levels

Variation among infloresences within plants

To measure between-trap variation, two traps were inserted into one bag, for two bags enclosing two infloresences on one plant (four traps total). Average them to get a dataset of unique infloresences.

bd.plantsi.inflo <-  betadisper(vegdist(decostand(fvolhybn.inflo, method="hellinger"), method="bray"), 
                           group = svhybn.inflo$plantsi)
meandist.plantsi.inflo <- bd.plantsi.inflo$group.distances %>% enframe(name="plantsi", value="centroid_dist") %>%
  left_join(count(svhybn, plantsi)) %>% filter(n>1)#plants with resamples
summary(meandist.plantsi.inflo$centroid_dist)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1027  0.1327  0.1388  0.1581  0.1770  0.2716 

Variation among plants in each cross type

#betadisper(vegdist(decostand(volhybn, method="hellinger"), method="bray"), group = specpn) #sample level
(bd.SpeciesR <- betadisper(vegdist(decostand(fvolhybn.plants, method="hellinger"), method="bray"), 
                           group = svhybn.plants$SpeciesR))

    Homogeneity of multivariate dispersions

Call: betadisper(d = vegdist(decostand(fvolhybn.plants, method =
"hellinger"), method = "bray"), group = svhybn.plants$SpeciesR)

No. of Positive Eigenvalues: 51
No. of Negative Eigenvalues: 45

Average distance to median:
 S. kaalae      K x H      H x K S. hookeri 
    0.2071     0.1944     0.2443     0.3252 

Eigenvalues for PCoA axes:
(Showing 8 of 96 eigenvalues)
 PCoA1  PCoA2  PCoA3  PCoA4  PCoA5  PCoA6  PCoA7  PCoA8 
6.3652 2.3196 0.9928 0.8622 0.5520 0.4399 0.3716 0.3383 
anova(bd.SpeciesR)
Analysis of Variance Table

Response: Distances
          Df  Sum Sq  Mean Sq F value    Pr(>F)    
Groups     3 0.27694 0.092312  14.847 5.644e-08 ***
Residuals 93 0.57825 0.006218                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(bd.SpeciesR)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = distances ~ group, data = df)

$group
                            diff         lwr        upr     p adj
K x H-S. kaalae      -0.01268834 -0.08251652 0.05713984 0.9643323
H x K-S. kaalae       0.03713597 -0.02079667 0.09506861 0.3416208
S. hookeri-S. kaalae  0.11808392  0.06651239 0.16965545 0.0000002
H x K-K x H           0.04982431 -0.02482515 0.12447378 0.3060649
S. hookeri-K x H      0.13077226  0.06094408 0.20060044 0.0000238
S. hookeri-H x K      0.08094795  0.02301531 0.13888059 0.0023642
tibble(group=bd.SpeciesR$group, distance = bd.SpeciesR$distances) %>% 
  bind_rows(bd.plantsi.inflo$distances %>% enframe(value="distance") %>% filter(distance!=0) %>% mutate(group="within")) %>%
  mutate(group = fct_relevel(group, c(levels(svhybn$SpeciesR), "within"))) %>% 
  ggplot(aes(x=group, y=distance, color=group)) +
  geom_boxplot(outlier.shape = NA) + geom_jitter(width=0.15, height=0) + geom_vline(xintercept = 4.5) + 
  scale_color_manual(values=hybcolr, guide="none") + 
  scale_y_continuous(limits=c(0,NA), expand = expansion(c(0,0.05))) + scale_x_discrete(labels=c(hyblabs, "Within plant")) + 
  theme_classic() + theme(axis.title.x=element_blank(), axis.ticks.x=element_blank()) + labs(y="Bray-Curtis distance to centroid")