This notebook estimates the indicators based on the raw clean data
and perfomrs the main analyses and figures used in the manuscript of the
multicountry paper. The input is the “clean kobo output” that was first
cleaned by 2_cleaning.Rmd
. Besides the plots and statistics
shown below, the output is are the indicators data ready to be used to
estimate the indicators (ind[1-3]_data.csv
), a single file
with the indicators already calculated
(indicators_full.csv
) and metadata.
Packages and functions
Load required libraries:
library(tidyr)
library(dplyr)
library(readr)
library(utile.tools)
library(stringr)
library(ggplot2)
library(ggsankey)
library(ggnewscale)
library(alluvial)
library(viridis)
library(cowplot)
library(lme4)
library(knitr)
library(glmmTMB)
Load required functions. These custom fuctions are available at: https://github.com/AliciaMstt/GeneticIndicators
source("get_indicator1_data.R")
source("get_indicator2_data.R")
source("get_indicator3_data.R")
source("get_metadata.R")
source("transform_to_Ne.R")
source("estimate_indicator1.R")
Other custom functions:
### not in
'%!in%' <- function(x,y)!('%in%'(x,y))
#' Duplicates data to create additional facet. Thanks to https://stackoverflow.com/questions/18933575/easily-add-an-all-facet-to-facet-wrap-in-ggplot2
#' @param df a dataframe
#' @param col the name of facet column
#'
CreateAllFacet <- function(df, col){
df$facet <- df[[col]]
temp <- df
temp$facet <- "all"
merged <-rbind(temp, df)
# ensure the facet value is a factor
merged[[col]] <- as.factor(merged[[col]])
return(merged)
}
Custom colors:
## IUCN official colors
# Assuming order of levels is: "re", "cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown" (for regional, and w/o "re" for global). Make sure to change the levels to that order before plotting. Note: NE should be "#FFFFFF" but since that means white I'm suing azure2
IUCNcolors<-c("#FF0000", "#FFA500", "#FFFF00", "#ADFF2F", "#008000", "#808080", "azure2", "bisque1")
IUCNcolors_regional<-c("darkorchid2", "#FF0000", "#FFA500", "#FFFF00", "#ADFF2F", "#008000", "#808080", "azure2", "bisque1")
## nice soft ramp for taxonomic groups
taxoncolors<-cividis(12) # same than using cividis(length(levels(as.factor(metadata$taxonomic_group))))
## Colors for simplified methods to define populations
# assuming the levels (see how this was created in the section "Simplify combinations of methods to define populations"): of running levels(as.factor(ind2_data$defined_populations_simplified)) (after new order)
# get a set of colors to highlight genetic and geographic with similar colors
simplifiedmethods_colors<-c("#FFA07A", #"dispersal_buffer"
"#7f611b", # "eco_biogeo_proxies"
"#668cd1", # "genetic_clusters"
"#668cd1", # "genetic_clusters eco_biogeo_proxies"
"#45c097", # "genetic_clusters geographic_boundaries"
"#d4b43e", # "geographic_boundaries"
"#d4b43e", # "geographic_boundaries eco_biogeo_proxies"
"#d4b43e", # "geographic_boundaries management_units"
"#b34656", # "management_units"
"#be72c9", # "other"
"#be72c9")# "other_combinations"
grouped_taxon_colors<-c("#a46cb7", "#7aa457", "#cb6a49")
Get data
Get indicators and metadata data from clean kobo output
# Get data:
kobo_clean<-read.csv(file="kobo_output_clean.csv", header=TRUE)
# Extract indicator 1 data from kobo output, show most relevant columns
ind1_data<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind1_data[,c(1:3, 12:14)])
# Extract Proportion of maintained populations (indicator) data from kobo output, show most relevant columns
ind2_data<-get_indicator2_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind2_data[,c(1:3, 9:10,13)])
# Extract indicator 3 data from kobo output, show most relevant columns
ind3_data<-get_indicator3_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind3_data[,c(1:3, 9:11)])
# extract metadata, show most relevant columns
metadata<-get_metadata(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(metadata[,c(1:3, 12, 25,26, 64)])
Get population data for those species assessed using the tabular text
template instead of Kobo. This file was produced by the script
1.2_cleaning.Rmd
ind1_data_from_templates<-read.csv(file="ind1_data_from_templates.csv")
Add data recorded using the population template to the ind1_data
already in the nice format.
ind1_data<-rbind(ind1_data, ind1_data_from_templates)
Estimate indicators
Indicator 1 or Ne 500 indicator (proportion of populations with Ne
>500):
Show most relevant columns of indicator 1 data
ind1_data %>% select(country_assessment, taxon, population, Name, Ne, NeLower, NeUpper, NeYear, GeneticMarkers, NcType, NcMethod, NcRange)
Remember what the function to transform NcRange and NcPoint data into
Ne does:
# check what the custom funciton does
transform_to_Ne
## function (ind1_data, ratio = 0.1)
## {
## ratio = ratio
## if (!is.numeric(ratio) || ratio < 0 || ratio > 1) {
## stop("Invalid argument. Please provide a number within the range 0 to 1, using `.` to delimit decimals.")
## }
## else {
## ind1_data = ind1_data
## ind1_data <- ind1_data %>% mutate(Nc_from_range = case_when(NcRange ==
## "more_5000_bymuch" ~ 10000, NcRange == "more_5000" ~
## 5500, NcRange == "less_5000_bymuch" ~ 500, NcRange ==
## "less_5000" ~ 4050, NcRange == "range_includes_5000" ~
## 5001)) %>% mutate(Ne_from_Nc = case_when(!is.na(NcPoint) ~
## NcPoint * ratio, !is.na(Nc_from_range) ~ Nc_from_range *
## ratio)) %>% mutate(Ne_combined = if_else(is.na(Ne),
## Ne_from_Nc, Ne)) %>% mutate(Ne_calculated_from = if_else(is.na(Ne),
## if_else(!is.na(NcPoint), "NcPoint ratio", if_else(!is.na(Nc_from_range),
## "NcRange ratio", NA_character_)), "genetic data"))
## print(ind1_data)
## }
## }
Use function to get Ne data from NcRange or NcPoint data, and their
combination (Ne estimated from Ne if Ne is available, otherwise, from
Nc)
ind1_data<-transform_to_Ne(ind1_data = ind1_data, ratio = 0.1)
## # A tibble: 5,652 × 40
## country_assessme… taxonomic_group taxon scientific_auth… genus year_assesment
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 2 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 3 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 4 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 5 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 6 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 7 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 8 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 9 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 10 sweden bird Dend… Bechstein 1803 Dend… 2022
## # … with 5,642 more rows, and 34 more variables: name_assessor <chr>,
## # email_assessor <chr>, kobo_tabular <chr>, defined_populations <chr>,
## # time_populations <chr>, X_validation_status <chr>, X_uuid <chr>,
## # multiassessment <chr>, population <chr>, Name <chr>, Origin <chr>,
## # IntroductionYear <chr>, Ne <dbl>, NeLower <dbl>, NeUpper <dbl>,
## # NeYear <chr>, GeneticMarkers <chr>, GeneticMarkersOther <chr>,
## # MethodNe <chr>, SourceNe <chr>, NcType <chr>, NcYear <chr>, …
Check transformation in example:
ind1_data %>% select(country_assessment, taxon, population, Name, Ne, NeLower, NeUpper, NeYear, GeneticMarkers, NcType, NcMethod, NcRange, Nc_from_range, Ne_from_Nc, Ne_combined)
Remember what the function to estimate indicator 1 does:
# check what the custom function does
estimate_indicator1
## function (ind1_data)
## {
## indicator1 <- ind1_data %>% group_by(X_uuid, ) %>% summarise(n_pops = n(),
## n_pops_Ne_data = sum(!is.na(Ne_combined)), n_pops_more_500 = sum(Ne_combined >
## 500, na.rm = TRUE), indicator1 = n_pops_more_500/n_pops_Ne_data) %>%
## left_join(metadata)
## print(indicator1)
## }
Now estimate indicator 1 :)
indicator1<-estimate_indicator1(ind1_data = ind1_data)
## Joining, by = "X_uuid"
## # A tibble: 609 × 69
## X_uuid n_pops n_pops_Ne_data n_pops_more_500 indicator1 country_assessm…
## <chr> <int> <int> <int> <dbl> <chr>
## 1 010d85cd-5… 2 1 1 1 united_states
## 2 018d6a54-b… 47 46 0 0 united_states
## 3 019bd95f-b… 1 1 0 0 sweden
## 4 01b10b29-9… 1 1 1 1 south_africa
## 5 0301e6b3-b… 3 3 3 1 france
## 6 037d6c8f-7… 4 2 2 1 united_states
## 7 03f03179-1… 1 1 1 1 south_africa
## 8 0586b61e-7… 12 12 0 0 belgium
## 9 065a53ba-0… 1 1 0 0 south_africa
## 10 06e6bb50-3… 1 1 0 0 belgium
## # … with 599 more rows, and 63 more variables: taxonomic_group <chr>,
## # taxon <chr>, scientific_authority <chr>, genus <chr>, year_assesment <chr>,
## # name_assessor <chr>, email_assessor <chr>, common_name <chr>,
## # kobo_tabular <chr>, X_validation_status <chr>, GBIF_taxonID <int>,
## # NCBI_taxonID <chr>, national_taxonID <chr>, source_national_taxonID <chr>,
## # other_populations <chr>, time_populations <chr>, defined_populations <chr>,
## # source_definition_populations <chr>, map_populations <chr>, …
Check example data:
indicator1 %>% select(X_uuid, taxon, country_assessment, n_pops, n_pops_Ne_data, n_pops_more_500, indicator1)
Indicator 2 or PM indicator: proportion of populations within
species which are maintained.
Show most relevant columns of indicator 2 data:
ind2_data %>% select(country_assessment, taxon, n_extant_populations, n_extint_populations)
Proportion of maintained populations (indicator) is the he proportion
of populations within species which are maintained. This can be
estimated based on the n_extant_populations
and
n_extint_populations
, as follows:
ind2_data$indicator2<- ind2_data$n_extant_populations / (ind2_data$n_extant_populations + ind2_data$n_extint_populations)
Example output selecting the most relevant columns:
ind2_data %>% select(country_assessment, taxon, n_extant_populations, n_extint_populations, indicator2)
Indicator 3 or genetic monitoring indicator: number of species with
genetic diversity monitoring
Example of indicator 3 data selecting the most relevant columns:
ind3_data %>% select(country_assessment, taxon, multiassessment, temp_gen_monitoring, gen_studies, gen_monitoring_years, source_genetic_studies)
Indicator 3 refers to the number (count) of taxa by country in which
genetic monitoring is occurring. This is stored in the variable
temp_gen_monitoring
as a “yes/no” answer for each taxon, so
to estimate the indicator, we only need to count how many said “yes”,
keeping only one of the records when the taxon was multiassessed.
indicator3<-ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring) %>%
filter(!duplicated(.)) %>%
# count "yes" in tem_gen_monitoring by country
filter(temp_gen_monitoring=="yes") %>%
group_by(country_assessment) %>%
summarise(n_taxon_gen_monitoring= n())
Example output:
indicator3
Join indicators and metadata in a single table
It could be useful to have the PM and Ne 500 indicators and the
metadata in a single large table.
indicators_full<-left_join(metadata, indicator1) %>%
left_join(ind2_data)
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "common_name", "kobo_tabular", "X_validation_status",
## "X_uuid", "GBIF_taxonID", "NCBI_taxonID", "national_taxonID",
## "source_national_taxonID", "other_populations", "time_populations",
## "defined_populations", "source_definition_populations", "map_populations",
## "map_populations_URL", "habitat_decline_area", "source_populations",
## "popsize_data", "ne_pops_exists", "nc_pops_exists", "ratio_exists",
## "species_related", "ratio_species_related", "ratio_year",
## "source_popsize_ratios", "species_comments", "realm", "IUCN_habitat",
## "other_habitat", "national_endemic", "transboundary_type", "other_explain",
## "country_proportion", "species_range", "rarity", "occurrence_extent",
## "occurrence_area", "pop_fragmentation_level", "species_range_comments",
## "global_IUCN", "regional_redlist", "other_assessment_status",
## "other_assessment_name", "source_status_distribution", "fecundity",
## "semelparous_offpring", "reproductive_strategy", "reproductive_strategy_other",
## "adult_age_data", "other_reproductive_strategy", "longevity_max",
## "longevity_median", "longevity_maturity", "longevity_age",
## "life_history_based_on", "life_history_sp_basedon", "sources_life_history",
## "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "other_populations",
## "time_populations", "defined_populations", "source_definition_populations",
## "map_populations", "map_populations_URL", "habitat_decline_area",
## "source_populations", "multiassessment")
Check number of rows of each object, i.e. n including alternative
assessments
# Number of populations inc alternative assessments
nrow(ind1_data)
## [1] 5652
# Number of assessments including multiassessments (these 3 objects should have the same number of rows)
nrow(indicators_full)
## [1] 966
nrow(ind2_data)
## [1] 966
nrow(ind3_data)
## [1] 966
nrow(metadata)
## [1] 966
Save indicators data
Save indicators data and metadata to csv files, useful for analyses
outside R.
# save processed data
write.csv(ind1_data, "ind1_data.csv", row.names = FALSE)
write.csv(indicators_full, "indicators_full.csv", row.names = FALSE)
write.csv(ind2_data, "ind2_data.csv", row.names = FALSE)
write.csv(ind3_data, "ind3_data.csv", row.names = FALSE)
write.csv(metadata, "metadata.csv", row.names = FALSE)
Change country name to nicer labels
To have nice levels in the plots we will change the way country names
are written:
# make factor
metadata$country_assessment<-as.factor(metadata$country_assessment)
indicators_full$country_assessment<-as.factor(indicators_full$country_assessment)
ind2_data$country_assessment<-as.factor(ind2_data$country_assessment)
ind1_data$country_assessment<-as.factor(ind1_data$country_assessment)
indicator1$country_assessment<-as.factor(indicator1$country_assessment)
# original levels
levels(metadata$country_assessment)
## [1] "australia" "belgium" "colombia" "france"
## [5] "japan" "mexico" "south_africa" "sweden"
## [9] "united_states"
# change
levels(metadata$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA")
levels(indicators_full$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA")
levels(ind1_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA")
levels(ind2_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA")
levels(indicator1$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA")
Simplify combinations of methods to define populations
The methods used to define populations come from a check box question
were one or more of the following categories can be selected:
genetic_clusters, geographic_boundaries, eco_biogeo_proxies,
adaptive_traits, management_units, other. As a consequence any
combination of the former can be possible. Leading to the following
frequency table:
table(indicators_full$defined_populations)
##
## adaptive_traits
## 5
## adaptive_traits management_units
## 1
## dispersal_buffer
## 159
## dispersal_buffer adaptive_traits
## 2
## dispersal_buffer eco_biogeo_proxies
## 1
## dispersal_buffer other
## 1
## eco_biogeo_proxies
## 41
## eco_biogeo_proxies adaptive_traits
## 3
## eco_biogeo_proxies dispersal_buffer
## 7
## eco_biogeo_proxies management_units
## 3
## eco_biogeo_proxies other
## 2
## genetic_clusters
## 107
## genetic_clusters adaptive_traits
## 7
## genetic_clusters dispersal_buffer
## 11
## genetic_clusters eco_biogeo_proxies
## 20
## genetic_clusters eco_biogeo_proxies adaptive_traits
## 3
## genetic_clusters eco_biogeo_proxies adaptive_traits management_units
## 2
## genetic_clusters eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries
## 70
## genetic_clusters geographic_boundaries adaptive_traits
## 5
## genetic_clusters geographic_boundaries eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits management_units
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries management_units
## 8
## genetic_clusters management_units
## 5
## genetic_clusters other
## 2
## geographic_boundaries
## 268
## geographic_boundaries adaptive_traits
## 12
## geographic_boundaries adaptive_traits management_units other
## 1
## geographic_boundaries dispersal_buffer
## 1
## geographic_boundaries eco_biogeo_proxies
## 114
## geographic_boundaries eco_biogeo_proxies adaptive_traits
## 3
## geographic_boundaries eco_biogeo_proxies management_units
## 3
## geographic_boundaries eco_biogeo_proxies other
## 2
## geographic_boundaries management_units
## 24
## geographic_boundaries other
## 12
## management_units
## 29
## management_units other
## 1
## other
## 19
It is hard to group the above methods, so we will keep the original
groups with n >=19 in the above list, and tag the combinations that
appear few times as as “other_combinations”.
Which groups have n>=19?
x<-as.data.frame(table(indicators_full$defined_populations)[table(indicators_full$defined_populations) >= 19])
colnames(x)[1]<-"method"
x
We can add this new column to the metadata and indicator data:
### for indicators
indicators_full<- indicators_full %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
### for meta
metadata<- metadata %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
### for ind1 raw data
ind1_data<- ind1_data %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
Check n for simplified methods:
table(indicators_full$defined_populations_simplified)
##
## dispersal_buffer
## 159
## eco_biogeo_proxies
## 41
## genetic_clusters
## 107
## genetic_clusters eco_biogeo_proxies
## 20
## genetic_clusters geographic_boundaries
## 70
## geographic_boundaries
## 268
## geographic_boundaries eco_biogeo_proxies
## 114
## geographic_boundaries management_units
## 24
## management_units
## 29
## other
## 19
## other_combinations
## 115
Table of equivalences:
indicators_full %>%
select(defined_populations, defined_populations_simplified) %>%
filter(!duplicated(defined_populations))
Create nicer names for ploting
# original method names
levels(as.factor(indicators_full$defined_populations_simplified))
## [1] "dispersal_buffer"
## [2] "eco_biogeo_proxies"
## [3] "genetic_clusters"
## [4] "genetic_clusters eco_biogeo_proxies"
## [5] "genetic_clusters geographic_boundaries"
## [6] "geographic_boundaries"
## [7] "geographic_boundaries eco_biogeo_proxies"
## [8] "geographic_boundaries management_units"
## [9] "management_units"
## [10] "other"
## [11] "other_combinations"
# nicer names
nice_names <- c("dispersal buffer",
"eco- biogeographic proxies",
"genetic clusters",
"genetic clusters & eco- biogeographic proxies",
"genetic clusters & geographic boundaries",
"geographic boundaries",
"geographic boundaries & eco- biogeographic proxies",
"geographic boundaries & management units",
"management units",
"other",
"other combinations")
### add them
indicators_full$defined_populations_nicenames <- factor(
indicators_full$defined_populations_simplified,
levels = levels(as.factor(indicators_full$defined_populations_simplified)),
labels = nice_names)
# metadata
metadata$defined_populations_nicenames <- factor(
metadata$defined_populations_simplified,
levels = levels(as.factor(metadata$defined_populations_simplified)),
labels = nice_names)
#check names match
select(metadata, defined_populations_nicenames, defined_populations_simplified)
levels(indicators_full$defined_populations_nicenames)
## [1] "dispersal buffer"
## [2] "eco- biogeographic proxies"
## [3] "genetic clusters"
## [4] "genetic clusters & eco- biogeographic proxies"
## [5] "genetic clusters & geographic boundaries"
## [6] "geographic boundaries"
## [7] "geographic boundaries & eco- biogeographic proxies"
## [8] "geographic boundaries & management units"
## [9] "management units"
## [10] "other"
## [11] "other combinations"
Averaging multiassessments (alternative assessments)
Some taxa were assessed twice or more times, for example to account
for uncertainty on how to divide populations. This information is stored
in variable multiassessment
of the metadata (created by
get_metadata()
). An example of taxa with multiple
assessments:
metadata %>%
filter(multiassessment=="multiassessment") %>%
select(taxonomic_group, taxon, country_assessment, multiassessment) %>%
arrange(taxon, country_assessment) %>%
head()
Alternative assessments allow to account for uncertainty in the
number of populations or the size of them. We can examine how the
indicators value species by species as done elsewhere in these analyses
(see below “Values for indicator 1 and 2 for multiassessed species), but
to examine global trends, some of the figures below use the average.
The averages are stored in a different column, labeled
indicator[1 or 2]_mean
.
indicators_averaged<-indicators_full %>%
# group desired multiassessments
group_by(country_assessment, multiassessment, taxon) %>%
# estimate means
mutate(indicator1_mean=mean(indicator1, na.rm=TRUE)) %>%
mutate(indicator2_mean=mean(indicator2, na.rm=TRUE)) %>%
# change NaN for NA (needed due to the NAs and 0s in the dataset)
mutate_all(~ifelse(is.nan(.), NA, .))
## `mutate_all()` ignored the following grouping variables:
## • Columns `country_assessment`, `multiassessment`, `taxon`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.
Examples of how this looks to check it was done properly. For
indicator 1:
A species assessed in different countries (Sweden and Belgium), with
alternative assessment within Sweden. The average is computed only for
the Swedish assessments
indicators_averaged %>%
filter(taxon == "Barbastella barbastellus") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
Alternative assessments within a single country:
indicators_averaged %>%
filter(taxon == "Rana dalmatina") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean, indicator2, indicator2_mean)
indicators_averaged %>%
filter(taxon == "Notophthalmus perstriatus") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean, indicator2, indicator2_mean)
If one of the alternative assessments has NA, it is removed from the
mean calculation, so that the average equals the value of the assessment
that has data:
indicators_averaged %>%
filter(taxon == "Alouatta palliata mexicana") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean, indicator2, indicator2_mean)
A species can have alternative assessments for both indicators:
indicators_averaged %>%
filter(taxon == "Aphelocoma coerulescens") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean, indicator2, indicator2_mean)
Because we will use the averages to show a single value for
multiasssessed taxa, we can keep only the first record for multiassessed
taxa.
indicators_averaged_one<-indicators_averaged[!duplicated(cbind(indicators_averaged$taxon, indicators_averaged$country_assessment)), ]
Example of how this looks:
indicators_averaged_one %>%
filter(taxon == "Aphelocoma coerulescens") %>%
select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean, indicator1, indicator1_mean)
indicators_averaged_one %>%
filter(taxon == "Rana dalmatina") %>%
select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean, indicator1, indicator1_mean)
indicators_averaged_one %>%
filter(taxon == "Notophthalmus perstriatus") %>%
select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean, indicator1, indicator1_mean)
Total number of taxa (ie assessments averaging alternative
assessments):
nrow(indicators_averaged_one)
## [1] 919
General description of records and taxa assessed by country
Records by country, including taxa assessed more than once (see below
for details on this)
ggplot(metadata, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country, including taxa assed more than once") +
theme_light()
To explore what kind of taxa countries assessed regardless of if they
assessed them once or more, we are going to use the subset
indicators_averaged_one
, were we averaged the indicators
and kept only 1 record per assessment.
How many taxa were assessed (i.e. counting only once taxa that were
assessed multiple times)?
# how many?
nrow(indicators_averaged_one)
## [1] 919
Plot taxa assessed excluding duplicates, i.e. the real number of taxa
assessed:
p1<-ggplot(indicators_averaged_one, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country") +
theme_light()
p1
Of which countries and taxonomic groups are the taxa that were
assessed more than once?
p2<- indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
filter(multiassessment=="multiassessment") %>%
ggplot(aes(x=taxonomic_group, fill=country_assessment)) +
geom_bar(stat = "count") +
theme(axis.text.x = element_text(angle = 45)) +
labs(fill="Country") +
xlab("") +
ggtitle("Number of taxa assessed more than once") +
theme_light()
p2
Total number of taxon with alternative assessments:
nrow(indicators_averaged_one %>% filter(multiassessment == "multiassessment"))
## [1] 44
Number of alternative assessments:
nrow(indicators_averaged%>% filter(multiassessment == "multiassessment"))
## [1] 91
How many alternative assessments there are per multiassessed
taxon?
x<-indicators_averaged %>% select(country_assessment, multiassessment, taxon) %>%
filter(multiassessment == "multiassessment") %>%
group_by(taxon) %>%
summarise(n=n())
kable(x)
Alasmidonta varicosa |
2 |
Alouatta palliata mexicana |
2 |
Ambystoma cingulatum |
2 |
Anguis fragilis |
2 |
Aphelocoma coerulescens |
3 |
Astragalus microcymbus |
2 |
Barbastella barbastellus |
2 |
Bombus terricola |
2 |
Cambarus elkensis |
2 |
Coronella austriaca |
2 |
Cryptobranchus alleganiensis alleganiensis |
2 |
Cryptomastix devia |
2 |
Erimystax harryi |
2 |
Etheostoma chienense |
2 |
Etheostoma osburni |
2 |
Hemphillia burringtoni |
2 |
Heterelmis stephani |
2 |
Hydroprogne caspia |
2 |
Lavinia exilicauda chi |
2 |
Lepidium papilliferum |
2 |
Mustela nigripes |
2 |
Necturus lewisi |
2 |
Nicrophorus americanus |
2 |
Notophthalmus perstriatus |
4 |
Notropis mekistocholas |
2 |
Notropis topeka |
2 |
Noturus munitus |
2 |
Obovaria subrotunda |
2 |
Oncorhynchus apache |
2 |
Oncorhynchus clarkii virginalis |
2 |
Phonotimpus talquian |
2 |
Pimelea spinescens subspecies spinescens |
2 |
Plestiodon egregius egregius |
2 |
Pleurobema rubrum |
2 |
Procambarus orcinus |
2 |
Pseudemys rubriventris |
2 |
Rana dalmatina |
2 |
Rhynchospora crinipes |
2 |
Streptanthus bracteatus |
2 |
Texella reyesi |
2 |
Thamnophis sirtalis tetrataenia |
2 |
Thoburnia atripinnis |
2 |
Toxolasma lividum |
2 |
Zapus hudsonius luteus |
2 |
mean(x$n)
## [1] 2.068182
sd(x$n)
## [1] 0.3339494
median(x$n)
## [1] 2
Heatmap of the taxa assessed by country (counting multiassessments
only once)
We aimed to represent different taxonomic groups within animals
(amphibians, birds, fishes, invertebrates, mammals and reptiles), plants
(angiosperms, bryophytes, gymnosperms and pteridophytes), fungi and
others (e.g. lichens). Order levels to represent those categories:
indicators_averaged_one$taxonomic_group<- factor(indicators_averaged_one$taxonomic_group,
levels = c("amphibian", "bird", "fish", "invertebrate", "mammal", "reptile", "angiosperm", "bryophyte", "gymnosperm", "pteridophytes", "fungus", "other"))
Make a heatmap
## Agregate data to get counts
agg_data <- indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
filter(multiassessment!="multiassessment") %>%
group_by(country_assessment, taxonomic_group) %>%
summarize(count = n())
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# country names in desired order
agg_data$country_assessment <- factor(agg_data$country_assessment,
levels = rev(levels(agg_data$country_assessment)))
## Create a heat map
p_heat<- ggplot(agg_data, aes(x = taxonomic_group, y = country_assessment, fill = count)) +
geom_tile() +
scale_fill_gradient(low = "lightblue", high = "darkblue") + # Adjust color scale as needed
labs(x = "",
y = "",
fill = "Number of taxa"
) +
scale_x_discrete(position = "top") +
theme_light() +
theme(panel.border = element_blank(), axis.text.x = element_text(angle = 90, hjust = 0),
legend.position = "right", text = element_text(size = 13),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # remove background
)
p_heat
Population size data (Has Nc or Ne? what type of Nc?)
Transform NAs to insuff_data_species, since effectively they are the
same:
# check NAs
summary(as.factor(metadata$popsize_data))
## data_for_species insuff_data_species yes NA's
## 130 216 613 7
# Replace NA to insuff_data_species
metadata <-metadata %>%
mutate(popsize_data=replace_na(popsize_data, "insuff_data_species"))
# check
summary(as.factor(metadata$popsize_data))
## data_for_species insuff_data_species yes
## 130 223 613
Re-order popsize_data to have inssuficient data at the end in the
plots
metadata$popsize_data<-factor(metadata$popsize_data,
levels= c("insuff_data_species", "data_for_species","yes"))
Supplementary Figure S5: Population size data availability by
country
Countries have population size data (Nc or Ne) regardless of the
taxonomic group. The last panel includes the entire dataset:
## Duplicate data with an additional column "facet"
df<-CreateAllFacet(metadata, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA", "all"))
# Plot
ggplot(df, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count", color="white") +
coord_flip() +
facet_wrap(~facet, ncol = 5, scales="free_x") +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")
ggsave("FigS5.pdf", width = 31, height = 25 , units = "cm")
Population size data availability in the entire dataset, by taxon
group:
ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count", color="white") +
coord_flip() +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa") +
theme_light() +
theme(legend.position="right")
Population size data availability in the entire dataset, by taxon
country:
p.popsize<-ggplot(metadata, aes(x=country_assessment, fill=popsize_data)) +
geom_bar(stat = "count", color="white") +
coord_flip() +
scale_x_discrete(limits=rev) +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa") +
theme_light() +
theme(text = element_text(size = 13), panel.border = element_blank(), legend.position="right")
p.popsize
Species level yes/no table with percentages
#total n
nrow(indicators_full)
## [1] 966
# table
df<- indicators_full %>%
group_by(popsize_data) %>%
summarise(n=n(),
percentage = (n / nrow(metadata)) * 100)
kable(df, digits = 0)
data_for_species |
130 |
13 |
insuff_data_species |
216 |
22 |
yes |
613 |
63 |
NA |
7 |
1 |
Ne data yes or not? & Type of Nc data
Ne available by taxa? (species level)
p1<- metadata %>%
filter(!is.na(ne_pops_exists)) %>%
filter(ne_pops_exists!="other_genetic_info") %>%
ggplot(aes(x=country_assessment, fill=ne_pops_exists)) +
geom_bar(color="white") +
scale_fill_manual(labels=c("No", "Yes"),
breaks=c("no_genetic_data", "ne_available"),
values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Number of taxa") +
labs(fill="Ne available \n(from genetic data)") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p1
Nc data available by taxa? (species level)
p2<-metadata %>%
filter(!is.na(nc_pops_exists)) %>%
ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
geom_bar(color="white") +
scale_fill_manual(labels=c("No", "Yes"),
breaks=c("no", "yes"),
values=c("#ff7f0e", "#2ca02c")) +
labs(fill="Nc available") +
xlab("") +
ylab("Number of taxa") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p2
What kind of Nc data? (dodge bars) This is at population level.
ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "dodge", color="white") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Number of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
What kind of Nc data? (fill bars). This is at population level.
p3<-ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "fill", color="white") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Proportion of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p3
Data availability at the population level cosidering Ne and Nc
combined. This plot shows where data came from for the Ne value used for
estimating the indicator.
p4<-ind1_data %>%
# NA as a category
mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "NA")) %>%
# reorder to have NA at the end (here at the start because we will use coord_flip below)
mutate(Ne_calculated_from = factor(Ne_calculated_from, levels=c("NA", "NcRange ratio", "NcPoint ratio", "genetic data"))) %>%
ggplot(aes(x=country_assessment, fill=Ne_calculated_from))+
geom_bar(position = "fill", color="white") +
scale_fill_manual(labels=c("Genetic data", "Nc ratio (point)", "Nc ratio (range or qualitative)", "Missing data"),
breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "NA"),
values=c("darkgreen", "#3388CC", "#E69F00", "grey80")) +
xlab("") +
scale_x_discrete(limits=rev) +
ylab("Percentage of populations") +
scale_y_continuous(labels = scales::percent) + # show axis in % instead of decimal
labs(fill="Data used to estimate Ne") +
theme_light() +
coord_flip() +
theme(text = element_text(size = 13), legend.position = "bottom", panel.border = element_blank())
p4
Range of values for Ne and Nc data
Range of Ne values by taxonomic group, without possible outliers (Ne
> 100000)
ind1_data %>%
filter(Ne < 100000) %>%
filter(!is.na(Ne)) %>%
ggplot(aes(x=taxonomic_group, y=Ne)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))
Check outliers
ind1_data %>%
filter(Ne > 100000) %>%
select(country_assessment, name_assessor, taxon, taxonomic_group, Ne, NeLower, NeUpper, multiassessment, population)
Range of Nc values (actual data point provided) by taxonomic group.
Without possible outliers.
ind1_data %>%
filter(!is.na(NcPoint)) %>%
filter(NcPoint < 10000000) %>%
ggplot(aes(x=taxonomic_group, y=NcPoint)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))
Check outliers
ind1_data %>%
filter(NcPoint > 10000000) %>%
select(country_assessment, name_assessor, taxon, taxonomic_group, population, NcPoint, NcLower, NcUpper, multiassessment, population)
Range of Ne values by taxonomic group from different sources. Without
possible outliers.
ind1_data %>%
filter(!is.na(Ne_combined)) %>%
filter(Ne < 100000) %>%
ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))
Range of Ne values by taxonomic group from different sources. Zoom to
Ne < 10,000
ind1_data %>%
filter(!is.na(Ne_combined)) %>%
filter(Ne < 10000) %>%
ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))
Number of populations with Ne < 50 from genetic or NcPoint
data
The more worrying threshold, where the effects of inbreeding become
more pronounced in the short-term (more immediate risk of extinction),
occurs at Ne < 50. We cannot estimate how many populations are below
this threshold from all our data, since the NcRange variable includes
qualitative range data (eg. “less 5000 by much”) that we can translate
into more/less than Ne 500, but not Ne 50.
Therefore to count how many populations are Ne < 50 we first
selected the populations with Ne or NcPoint data:
forNe50<- ind1_data %>%
# keep only rows without missing data in Ne or NcPoint
filter(!is.na(Ne) | !is.na(NcPoint)) %>%
# keep more relevant variables:
select(country_assessment, taxon, Ne, MethodNe, NcPoint, NcRange, Ne_from_Nc, Ne_calculated_from, Ne_combined)
forNe50
Total number of populations with Ne or Nc_point data:
nrow(forNe50)
## [1] 1615
We then can count how many of them are below Ne 50:
# select pops Ne_combined (coming from either Ne genetic or NePoint converted with the ratio)
Neless50<- forNe50 %>% filter(Ne_combined < 50)
# how many?
nrow(Neless50)
## [1] 919
# % of the the total number of pops with Ne or Nc_point data:
round(nrow(Neless50) / nrow(forNe50) * 100)
## [1] 57
Check by origin of the Nc data
Neless50 %>% ggplot(aes(x=Ne_combined, fill=Ne_calculated_from)) +
geom_histogram(alpha=0.6, position = 'identity') + theme_light()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Missing data on extant and extinct populations
We have NA in Proportion of maintained populations (indicator)
because in some cases the number of extinct populations is unknown,
therefore the operation cannot be computed.
Counts
Total records with NA in extant populations:
sum(is.na(indicators_full$n_extant_populations))
## [1] 19
Taxa with NA in extant populations:
indicators_full %>%
filter(is.na(n_extant_populations)) %>%
select(country_assessment, taxonomic_group, taxon, n_extant_populations, n_extint_populations)
Total taxa with NA in extinct populations:
sum(is.na(indicators_full$n_extint_populations))
## [1] 416
Do taxa with NA for extant also have NA for extinct?
indicators_full$taxon[is.na(indicators_full$n_extant_populations)] %in% indicators_full$taxon[is.na(indicators_full$n_extint_populations)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE
So out of the 966, we have 416 records with NA in
n_extinct and 19 records with NA in n_extant.
Of them, 19 have NA in both n_extant and n_extinct.
Plot missing data extinct populations
p5<-indicators_full %>%
ggplot(aes(x=country_assessment, fill=!is.na(n_extint_populations))) + # Notice this shows which taxa DO NOT have NA, ie exctint pops are KNOWN
geom_bar(color="white") +
scale_fill_manual(labels=c("Number of populations known", "Number of populations unknown"),
breaks =c("TRUE", "FALSE"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Data availability on extinct \npopulations") +
xlab("") + ylab("Number of taxa") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p5
Supplementary Figure S3: Missing data in extinct populations by
country and method and by taxonomic group
Missing data in number of extinct populations by method to define
populations:
pa<-indicators_full %>%
ggplot(aes(x=defined_populations_nicenames, fill=!is.na(n_extint_populations))) +
geom_bar(color="white") +
coord_flip()+
scale_fill_manual(labels=c("Number of populations \nknown", "Number of populations \nunknown"),
breaks =c("TRUE", "FALSE"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Data availability on \nextinct populations") +
xlab("") + ylab("Number of taxa") +
facet_wrap(country_assessment ~., nrow = 3, scales="free_x") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")
pa
Missing data in number of extinct populations by method to define
populations:
pb<-indicators_full %>%
ggplot(aes(x=taxonomic_group, fill=!is.na(n_extint_populations)))+
geom_bar(color="white") +
coord_flip()+
scale_fill_manual(labels=c("Number of populations \nknown", "Number of populations \nunknown"),
breaks =c("TRUE", "FALSE"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Data availability on \nextinct populations") +
xlab("") + ylab("Number of taxa") +
facet_wrap(country_assessment ~., nrow = 3, scales="free_x") +
theme_light() +
theme(panel.border = element_blank(), legend.position="none")
pb
Single figure:
plot_grid(pa, pb,
align = "v", ncol=1,
rel_heights = c(1.1, 1),
labels=c("A)", "B)"))
ggsave("FigS3.pdf", width = 31, height = 50 , units = "cm")
Main Figure 1: Taxa by country and data availability to estimate Ne
indciator (origion of data to estimate Ne) and PM indicator (missing
data on pop extinction):
Distribution of Nc, Ne and types of Ne in a single figure with 4
panels, using count for a, b & c, and proportions for d:
# plot
plot_grid(p_heat + theme(legend.position = "right", legend.justification = c(0,.5)), # legend.justification aligns legends
p5 + coord_flip() +
scale_x_discrete(limits=rev) +
theme(legend.position = "right", legend.justification = c(0,.5)),
p.popsize + theme(legend.position = "right", legend.justification = c(0,.5)),
p4 + theme(legend.position = "right", legend.justification = c(0,.5)),
ncol = 1, labels = c("A)", "B)", "C)", "D)"), align = "v",
rel_heights = c(1.3, 1, 1,1))
ggsave("Fig1.pdf", width = 17, height = 33 , units = "cm")
Main Figure 2: Method to define populations used by country and taxa
(alluvial)
Reformat data
# reformat data
foralluvial<-metadata %>% group_by(country_assessment, defined_populations_nicenames, taxonomic_group) %>%
summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_nicenames'. You can override using the `.groups` argument.
# define colors
my_cols<- simplifiedmethods_colors
# we need a vector of colors by country for each row of the dataset, so:
methodspop<-as.factor(foralluvial$defined_populations_nicenames)
levels(methodspop)<-my_cols
methodspop<-as.vector(methodspop)
head(methodspop)
## [1] "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#45c097"
Plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
col=methodspop,
blocks=FALSE,
gap.width = 0.5,
cex=.8,
xw = 0.1,
cw = 0.2,
border = NA,
alpha = .7)
pdf("Fig2.pdf", width = 9, height = 7)
# plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
col=methodspop,
blocks=FALSE,
gap.width = 0.5,
cex=.8,
xw = 0.1,
cw = 0.2,
border = NA,
alpha = .7)
dev.off()
## quartz_off_screen
## 2
Get count to add numbers outside R:
# n for country
metadata %>% group_by(country_assessment) %>%
summarise(n=n())
# n for taxonomic group
metadata %>% group_by(taxonomic_group) %>%
summarise(n=n())
Exploratory plots for the association of distribution range
(restricted vs wide) on the indicators
All the following plots and analyses consider the average of
multiassessed species (variable _mean
), so that they are
shown only once.
To have nicer looking plots, change “wide_ranging” for “wide
ranging”:
indicators_averaged_one$species_range<-gsub("wide_ranging", "wide ranging", indicators_averaged_one$species_range)
Indicator 1 (Ne>500)
Plot Indicator 1 by type of range in the entire dataset. Filtering NA
in species range:
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p1<-indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator1_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
labels=c("wide ranging", "restricted", "unknown"),
values=c("#00BFC4", "#F8766D", "grey80")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p1
Plot Ne Indicator by country and type of range. Remove “unknown” and
NA for better visualization.
### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA", "all"))
## plot
df %>%
# filter out "unknown" range
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=species_range, y=indicator1_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
scale_x_discrete(breaks=c("wide ranging", "restricted"),
labels=c("wide ranging", "restricted")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~facet, ncol = 5) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 658 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 658 rows containing missing values (`geom_point()`).
Indicator 2 (mantained populations)
Plot Indicator 2 by type of range in the entire dataset. Filtering NA
in species range:
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p2<-indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator2_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
labels=c("wide ranging", "restricted", "unknown"),
values=c("#00BFC4", "#F8766D", "grey80")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p2
Plot Indicator 2 by country and type of range. We remove NA and
unknown for better visualization.
### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "South Africa", "Sweden", "USA", "all"))
## plot
df %>%
# filter out "unknown" range
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=species_range, y=indicator2_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~facet, ncol = 5) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 742 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 742 rows containing missing values (`geom_point()`).
Single plot PM and Ne indicators by range type
plot_grid(p1, p2, ncol=1, align = "v", labels=c("a)", "b)"))
Sampling size of different variables useful to interpret the
statistical models run below
The plots and tables below are meant to visualize the sampling size
and data distribution of some of the variables used in the models below.
Data is a subset filtering outliers (>500 populations) and using the
simplified methods (see above). Multiassessed species are considered
independently (each assessment is a data point).
Number of maintained populations by country and method
Number of maintained populations by country and method is useful to
interpret the models that would be run below.
indicators_full %>%
filter(n_extant_populations<500) %>% # filter outliers
# order countries vertically by similar number of pops
mutate(country_assessment = factor(country_assessment,
levels=c("Colombia", "Australia", "Belgium",
"Mexico", "France", "USA",
"South Africa", "Japan", "Sweden"))) %>%
ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations,
fill=defined_populations_nicenames, color=defined_populations_nicenames)) +
geom_boxplot() +
geom_jitter(size=.3, width = 0.1, color="black") +
coord_flip() +
facet_wrap(country_assessment ~ ., nrow=3, scales="free_x") +
xlab("") +
ylab("Number of maintained populations") +
scale_fill_manual(values=alpha(simplifiedmethods_colors, .3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text = element_text(size = 15))
Ne values
ind1_data %>%
filter(Ne_combined < 100000) %>% # filter outliers
ggplot(aes(x=defined_populations_simplified, y=Ne_combined,
color=Ne_calculated_from)) +
geom_boxplot(position = "dodge") +
geom_jitter(position = position_dodge(width = 0.75)) +
facet_wrap(country_assessment ~ ., nrow=3) +
coord_flip() +
theme_light()
Zoom to Ne 500
ind1_data %>%
filter(Ne_combined < 100000) %>% # filter outliers
ggplot(aes(x=defined_populations_simplified, y=Ne_combined,
color=Ne_calculated_from)) +
ylim(0,2000)+
geom_boxplot(position = "dodge") +
geom_jitter(position = position_dodge(width = 0.75)) +
facet_wrap(country_assessment ~ ., nrow=3) +
coord_flip() +
theme_light()
## Warning: Removed 104 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 104 rows containing missing values (`geom_point()`).
Summary table for sampling size by method and source of Ne:
x<- ind1_data %>%
filter(!is.na(Ne_calculated_from)) %>%
group_by(defined_populations_simplified, Ne_calculated_from) %>%
summarise(n=n())
## `summarise()` has grouped output by 'defined_populations_simplified'. You can
## override using the `.groups` argument.
kable(x)
dispersal_buffer |
genetic data |
10 |
dispersal_buffer |
NcPoint ratio |
226 |
dispersal_buffer |
NcRange ratio |
1114 |
eco_biogeo_proxies |
genetic data |
8 |
eco_biogeo_proxies |
NcPoint ratio |
55 |
eco_biogeo_proxies |
NcRange ratio |
82 |
genetic_clusters |
genetic data |
43 |
genetic_clusters |
NcPoint ratio |
32 |
genetic_clusters |
NcRange ratio |
59 |
genetic_clusters eco_biogeo_proxies |
genetic data |
4 |
genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
3 |
genetic_clusters eco_biogeo_proxies |
NcRange ratio |
18 |
genetic_clusters geographic_boundaries |
genetic data |
44 |
genetic_clusters geographic_boundaries |
NcPoint ratio |
34 |
genetic_clusters geographic_boundaries |
NcRange ratio |
83 |
geographic_boundaries |
genetic data |
142 |
geographic_boundaries |
NcPoint ratio |
478 |
geographic_boundaries |
NcRange ratio |
594 |
geographic_boundaries eco_biogeo_proxies |
genetic data |
8 |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
68 |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
200 |
geographic_boundaries management_units |
genetic data |
29 |
geographic_boundaries management_units |
NcPoint ratio |
189 |
geographic_boundaries management_units |
NcRange ratio |
22 |
management_units |
NcPoint ratio |
48 |
management_units |
NcRange ratio |
76 |
other |
NcPoint ratio |
3 |
other |
NcRange ratio |
14 |
other_combinations |
genetic data |
61 |
other_combinations |
NcPoint ratio |
130 |
other_combinations |
NcRange ratio |
712 |
Same as above but adding country:
x<- ind1_data %>%
filter(!is.na(Ne_calculated_from)) %>%
group_by(country_assessment, defined_populations_simplified, Ne_calculated_from) %>%
summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_simplified'. You can override using the `.groups`
## argument.
kable(x)
Australia |
genetic_clusters |
genetic data |
7 |
Australia |
genetic_clusters |
NcPoint ratio |
8 |
Australia |
genetic_clusters geographic_boundaries |
genetic data |
15 |
Australia |
genetic_clusters geographic_boundaries |
NcPoint ratio |
13 |
Australia |
genetic_clusters geographic_boundaries |
NcRange ratio |
7 |
Australia |
geographic_boundaries |
genetic data |
15 |
Australia |
geographic_boundaries |
NcPoint ratio |
76 |
Australia |
geographic_boundaries |
NcRange ratio |
59 |
Australia |
geographic_boundaries management_units |
NcPoint ratio |
8 |
Australia |
geographic_boundaries management_units |
NcRange ratio |
3 |
Australia |
management_units |
NcRange ratio |
3 |
Australia |
other_combinations |
NcRange ratio |
4 |
Belgium |
dispersal_buffer |
genetic data |
10 |
Belgium |
dispersal_buffer |
NcPoint ratio |
8 |
Belgium |
dispersal_buffer |
NcRange ratio |
844 |
Belgium |
genetic_clusters |
NcRange ratio |
7 |
Belgium |
other_combinations |
genetic data |
40 |
Belgium |
other_combinations |
NcPoint ratio |
2 |
Belgium |
other_combinations |
NcRange ratio |
379 |
Colombia |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
4 |
Colombia |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
46 |
Colombia |
other_combinations |
NcRange ratio |
1 |
France |
eco_biogeo_proxies |
genetic data |
7 |
France |
genetic_clusters |
genetic data |
3 |
France |
genetic_clusters |
NcRange ratio |
1 |
France |
genetic_clusters eco_biogeo_proxies |
genetic data |
3 |
France |
genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
1 |
France |
genetic_clusters geographic_boundaries |
genetic data |
6 |
France |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
France |
genetic_clusters geographic_boundaries |
NcRange ratio |
7 |
France |
geographic_boundaries |
NcPoint ratio |
12 |
France |
geographic_boundaries |
NcRange ratio |
22 |
France |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
1 |
France |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
2 |
France |
geographic_boundaries management_units |
genetic data |
15 |
France |
geographic_boundaries management_units |
NcPoint ratio |
38 |
France |
geographic_boundaries management_units |
NcRange ratio |
12 |
France |
management_units |
NcPoint ratio |
10 |
France |
management_units |
NcRange ratio |
8 |
France |
other_combinations |
genetic data |
3 |
France |
other_combinations |
NcPoint ratio |
20 |
France |
other_combinations |
NcRange ratio |
10 |
Japan |
dispersal_buffer |
NcPoint ratio |
214 |
Japan |
dispersal_buffer |
NcRange ratio |
232 |
Japan |
geographic_boundaries |
NcPoint ratio |
1 |
Japan |
geographic_boundaries |
NcRange ratio |
5 |
Mexico |
genetic_clusters |
genetic data |
13 |
Mexico |
genetic_clusters |
NcPoint ratio |
15 |
Mexico |
genetic_clusters |
NcRange ratio |
24 |
Mexico |
genetic_clusters eco_biogeo_proxies |
genetic data |
1 |
Mexico |
genetic_clusters eco_biogeo_proxies |
NcRange ratio |
17 |
Mexico |
genetic_clusters geographic_boundaries |
genetic data |
2 |
Mexico |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
Mexico |
genetic_clusters geographic_boundaries |
NcRange ratio |
15 |
Mexico |
geographic_boundaries |
NcRange ratio |
75 |
Mexico |
other |
NcRange ratio |
1 |
Mexico |
other_combinations |
genetic data |
4 |
Mexico |
other_combinations |
NcRange ratio |
26 |
South Africa |
genetic_clusters |
genetic data |
12 |
South Africa |
genetic_clusters |
NcPoint ratio |
3 |
South Africa |
genetic_clusters |
NcRange ratio |
6 |
South Africa |
genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
2 |
South Africa |
genetic_clusters eco_biogeo_proxies |
NcRange ratio |
1 |
South Africa |
genetic_clusters geographic_boundaries |
genetic data |
2 |
South Africa |
genetic_clusters geographic_boundaries |
NcPoint ratio |
2 |
South Africa |
genetic_clusters geographic_boundaries |
NcRange ratio |
11 |
South Africa |
geographic_boundaries |
genetic data |
2 |
South Africa |
geographic_boundaries |
NcPoint ratio |
28 |
South Africa |
geographic_boundaries |
NcRange ratio |
21 |
South Africa |
geographic_boundaries management_units |
NcRange ratio |
1 |
South Africa |
management_units |
NcPoint ratio |
1 |
South Africa |
other |
NcRange ratio |
1 |
South Africa |
other_combinations |
genetic data |
2 |
South Africa |
other_combinations |
NcPoint ratio |
8 |
South Africa |
other_combinations |
NcRange ratio |
4 |
Sweden |
dispersal_buffer |
NcPoint ratio |
4 |
Sweden |
dispersal_buffer |
NcRange ratio |
38 |
Sweden |
eco_biogeo_proxies |
NcRange ratio |
26 |
Sweden |
genetic_clusters |
genetic data |
7 |
Sweden |
genetic_clusters |
NcPoint ratio |
3 |
Sweden |
genetic_clusters |
NcRange ratio |
11 |
Sweden |
genetic_clusters geographic_boundaries |
genetic data |
19 |
Sweden |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
Sweden |
genetic_clusters geographic_boundaries |
NcRange ratio |
41 |
Sweden |
geographic_boundaries |
genetic data |
2 |
Sweden |
geographic_boundaries |
NcPoint ratio |
67 |
Sweden |
geographic_boundaries |
NcRange ratio |
168 |
Sweden |
geographic_boundaries management_units |
NcPoint ratio |
3 |
Sweden |
geographic_boundaries management_units |
NcRange ratio |
5 |
Sweden |
management_units |
NcPoint ratio |
12 |
Sweden |
other |
NcRange ratio |
10 |
Sweden |
other_combinations |
genetic data |
3 |
Sweden |
other_combinations |
NcPoint ratio |
7 |
Sweden |
other_combinations |
NcRange ratio |
265 |
USA |
eco_biogeo_proxies |
genetic data |
1 |
USA |
eco_biogeo_proxies |
NcPoint ratio |
55 |
USA |
eco_biogeo_proxies |
NcRange ratio |
56 |
USA |
genetic_clusters |
genetic data |
1 |
USA |
genetic_clusters |
NcPoint ratio |
3 |
USA |
genetic_clusters |
NcRange ratio |
10 |
USA |
genetic_clusters geographic_boundaries |
NcPoint ratio |
1 |
USA |
genetic_clusters geographic_boundaries |
NcRange ratio |
2 |
USA |
geographic_boundaries |
genetic data |
123 |
USA |
geographic_boundaries |
NcPoint ratio |
294 |
USA |
geographic_boundaries |
NcRange ratio |
244 |
USA |
geographic_boundaries eco_biogeo_proxies |
genetic data |
8 |
USA |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
63 |
USA |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
152 |
USA |
geographic_boundaries management_units |
genetic data |
14 |
USA |
geographic_boundaries management_units |
NcPoint ratio |
140 |
USA |
geographic_boundaries management_units |
NcRange ratio |
1 |
USA |
management_units |
NcPoint ratio |
25 |
USA |
management_units |
NcRange ratio |
65 |
USA |
other |
NcPoint ratio |
3 |
USA |
other |
NcRange ratio |
2 |
USA |
other_combinations |
genetic data |
9 |
USA |
other_combinations |
NcPoint ratio |
93 |
USA |
other_combinations |
NcRange ratio |
23 |
Distribution type (wide / restricted)
x<-indicators_full %>% filter(species_range !="unknown") %>%
group_by(defined_populations_nicenames, species_range) %>%
summarise(n=n())
## `summarise()` has grouped output by 'defined_populations_nicenames'. You can
## override using the `.groups` argument.
kable(x)
dispersal buffer |
restricted |
54 |
dispersal buffer |
wide_ranging |
99 |
eco- biogeographic proxies |
restricted |
24 |
eco- biogeographic proxies |
wide_ranging |
16 |
genetic clusters |
restricted |
46 |
genetic clusters |
wide_ranging |
59 |
genetic clusters & eco- biogeographic proxies |
restricted |
8 |
genetic clusters & eco- biogeographic proxies |
wide_ranging |
12 |
genetic clusters & geographic boundaries |
restricted |
34 |
genetic clusters & geographic boundaries |
wide_ranging |
34 |
geographic boundaries |
restricted |
202 |
geographic boundaries |
wide_ranging |
65 |
geographic boundaries & eco- biogeographic
proxies |
restricted |
74 |
geographic boundaries & eco- biogeographic
proxies |
wide_ranging |
17 |
geographic boundaries & management units |
restricted |
14 |
geographic boundaries & management units |
wide_ranging |
10 |
management units |
restricted |
14 |
management units |
wide_ranging |
13 |
other |
restricted |
13 |
other |
wide_ranging |
4 |
other combinations |
restricted |
45 |
other combinations |
wide_ranging |
64 |
Statistical models: test for associations between method used to
define populations / range type on the number of populations and the
indicator values
The analyses and plots below us a subset of data filtering outliers
(>500 populations) and using the simplified methods (see above).
Multiassessed species are considered independently (each assessment is a
data point).
(a) Does the number of maintained pops vary with method used?
First we tested whether the different methods reported in this study
were associated with varying numbers of populations obtained. For this
analysis, we also controlled for range type, as we expect species with
wider ranges to plausibly have more populations than species with
narrower ranges.
Plot number of populations by method.
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
# plot for number of pops
pa<- df %>%
ggplot(aes(x=myaxis, y=n_extant_populations, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Number of maintained populations") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
pa
Prepare data for model (remove outliers, “unknown” category and NA in
desired variable) and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>% # we remove "unknonw" because its n is too low, thus unbalancing the model
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 149
## eco_biogeo_proxies
## 40
## genetic_clusters
## 103
## genetic_clusters eco_biogeo_proxies
## 19
## genetic_clusters geographic_boundaries
## 68
## geographic_boundaries
## 263
## geographic_boundaries eco_biogeo_proxies
## 90
## geographic_boundaries management_units
## 24
## management_units
## 27
## other
## 14
## other_combinations
## 106
# total n
nrow(data_for_model)
## [1] 903
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
# make sure specis range is a factor
data_for_model$species_range<-as.factor(data_for_model$species_range)
Run model asking: Does the number of maintained pops vary with method
and range?
m.a1<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified + data_for_model$species_range + (1|data_for_model$country_assessment), family ="poisson")
summary(m.a1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
## data_for_model$species_range + (1 | data_for_model$country_assessment)
##
## AIC BIC logLik deviance df.resid
## 24348.0 24410.4 -12161.0 24322.0 890
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.343 -2.886 -1.068 0.556 84.303
##
## Random effects:
## Groups Name Variance Std.Dev.
## data_for_model$country_assessment (Intercept) 0.931 0.9649
## Number of obs: 903, groups: data_for_model$country_assessment, 9
##
## Fixed effects:
## Estimate
## (Intercept) 1.940970
## data_for_model$defined_populations_simplifieddispersal_buffer -1.160563
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.059980
## data_for_model$defined_populations_simplifiedgenetic_clusters -1.415226
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -1.291565
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.159258
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.012621
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -0.006828
## data_for_model$defined_populations_simplifiedmanagement_units -0.662463
## data_for_model$defined_populations_simplifiedother -1.172203
## data_for_model$defined_populations_simplifiedother_combinations -0.645222
## data_for_model$species_rangewide_ranging 0.982729
## Std. Error
## (Intercept) 0.322581
## data_for_model$defined_populations_simplifieddispersal_buffer 0.051124
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.034284
## data_for_model$defined_populations_simplifiedgenetic_clusters 0.063791
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.094117
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.036018
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.040706
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.051351
## data_for_model$defined_populations_simplifiedmanagement_units 0.055633
## data_for_model$defined_populations_simplifiedother 0.111661
## data_for_model$defined_populations_simplifiedother_combinations 0.035778
## data_for_model$species_rangewide_ranging 0.020473
## z value
## (Intercept) 6.017
## data_for_model$defined_populations_simplifieddispersal_buffer -22.701
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 1.750
## data_for_model$defined_populations_simplifiedgenetic_clusters -22.185
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -13.723
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 4.422
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.310
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -0.133
## data_for_model$defined_populations_simplifiedmanagement_units -11.908
## data_for_model$defined_populations_simplifiedother -10.498
## data_for_model$defined_populations_simplifiedother_combinations -18.034
## data_for_model$species_rangewide_ranging 48.002
## Pr(>|z|)
## (Intercept) 1.78e-09
## data_for_model$defined_populations_simplifieddispersal_buffer < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.0802
## data_for_model$defined_populations_simplifiedgenetic_clusters < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 9.80e-06
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.7565
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.8942
## data_for_model$defined_populations_simplifiedmanagement_units < 2e-16
## data_for_model$defined_populations_simplifiedother < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations < 2e-16
## data_for_model$species_rangewide_ranging < 2e-16
##
## (Intercept) ***
## data_for_model$defined_populations_simplifieddispersal_buffer ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies .
## data_for_model$defined_populations_simplifiedgenetic_clusters ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units
## data_for_model$defined_populations_simplifiedmanagement_units ***
## data_for_model$defined_populations_simplifiedother ***
## data_for_model$defined_populations_simplifiedother_combinations ***
## data_for_model$species_rangewide_ranging ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_ -0.036
## dt_fr_$____ -0.015 0.118
## dt_fr_mdl$dfnd_ppltns_smplfdg_ -0.016 0.108
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.006 0.050
## dt_f_$___g_ -0.024 0.135
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.022 0.096
## dt_f_$___m_ -0.014 0.071
## dt_fr_mdl$dfnd_ppltns_smplfdm_ -0.008 0.081
## dt_fr_mdl$d__ -0.006 0.039
## dt_fr_mdl$dfnd_ppltns_smplfdt_ -0.029 0.437
## dt_fr_mdl$s__ -0.026 -0.111
## d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_ 0.113
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ 0.108 0.057
## dt_f_$___g_ 0.194 0.168
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.270 0.093
## dt_f_$___m_ 0.158 0.079
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.187 0.078
## dt_fr_mdl$d__ 0.077 0.036
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.217 0.148
## dt_fr_mdl$s__ -0.151 -0.095
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_ 0.088
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.095 0.163
## dt_f_$___m_ 0.057 0.132
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.069 0.130
## dt_fr_mdl$d__ 0.028 0.064
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.091 0.224
## dt_fr_mdl$s__ -0.079 -0.109
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_ 0.141
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.167 0.099
## dt_fr_mdl$d__ 0.065 0.043
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.187 0.130
## dt_fr_mdl$s__ -0.146 -0.030
## dt_fr_mdl$dfnd_ppltns_smplfdm_ dt_fr_mdl$d__
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_mdl$d__ 0.047
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.144 0.065
## dt_fr_mdl$s__ -0.151 -0.023
## dt_fr_mdl$dfnd_ppltns_smplfdt_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_mdl$d__
## dt_fr_mdl$dfnd_ppltns_smplfdt_
## dt_fr_mdl$s__ -0.146
Considering the role of method was so important for determining the
number of populations, we also tested whether this effect remained after
removing “wide-ranging” from the model. The objective here was to test
whether method alone would also produce varying numbers of populations,
for example if species rangedness were unknown.
Does the number of maintained pops vary with method used? (does
method still influence number of populations if we exclude range type
from the model):
m.a2<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
(1|data_for_model$country_assessment), family ="poisson")
See results:
summary(m.a2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
## (1 | data_for_model$country_assessment)
##
## AIC BIC logLik deviance df.resid
## 26681.3 26739.0 -13328.7 26657.3 891
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.006 -2.914 -1.304 0.239 74.870
##
## Random effects:
## Groups Name Variance Std.Dev.
## data_for_model$country_assessment (Intercept) 1.055 1.027
## Number of obs: 903, groups: data_for_model$country_assessment, 9
##
## Fixed effects:
## Estimate
## (Intercept) 2.24642
## data_for_model$defined_populations_simplifieddispersal_buffer -0.87138
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.30408
## data_for_model$defined_populations_simplifiedgenetic_clusters -1.11830
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.92439
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.34580
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.27544
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.06671
## data_for_model$defined_populations_simplifiedmanagement_units -0.24310
## data_for_model$defined_populations_simplifiedother -1.04429
## data_for_model$defined_populations_simplifiedother_combinations -0.38714
## Std. Error
## (Intercept) 0.34317
## data_for_model$defined_populations_simplifieddispersal_buffer 0.05300
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.03408
## data_for_model$defined_populations_simplifiedgenetic_clusters 0.06348
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.09394
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.03532
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.04060
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.05115
## data_for_model$defined_populations_simplifiedmanagement_units 0.05505
## data_for_model$defined_populations_simplifiedother 0.11175
## data_for_model$defined_populations_simplifiedother_combinations 0.03549
## z value
## (Intercept) 6.546
## data_for_model$defined_populations_simplifieddispersal_buffer -16.442
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 8.922
## data_for_model$defined_populations_simplifiedgenetic_clusters -17.616
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -9.840
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 9.790
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 6.785
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 1.304
## data_for_model$defined_populations_simplifiedmanagement_units -4.416
## data_for_model$defined_populations_simplifiedother -9.345
## data_for_model$defined_populations_simplifiedother_combinations -10.908
## Pr(>|z|)
## (Intercept) 5.91e-11
## data_for_model$defined_populations_simplifieddispersal_buffer < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries < 2e-16
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 1.16e-11
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.192
## data_for_model$defined_populations_simplifiedmanagement_units 1.00e-05
## data_for_model$defined_populations_simplifiedother < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations < 2e-16
##
## (Intercept) ***
## data_for_model$defined_populations_simplifieddispersal_buffer ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units
## data_for_model$defined_populations_simplifiedmanagement_units ***
## data_for_model$defined_populations_simplifiedother ***
## data_for_model$defined_populations_simplifiedother_combinations ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_ -0.038
## dt_fr_$____ -0.019 0.107
## dt_fr_mdl$dfnd_ppltns_smplfdg_ -0.017 0.093
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.008 0.042
## dt_f_$___g_ -0.024 0.115
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.026 0.086
## dt_f_$___m_ -0.013 0.072
## dt_fr_mdl$dfnd_ppltns_smplfdm_ -0.013 0.068
## dt_fr_md$__ -0.006 0.035
## dt_fr_mdl$dfnd_ppltns_smplfdt_ -0.031 0.432
## d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_ 0.104
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ 0.102 0.053
## dt_f_$___g_ 0.200 0.147
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.261 0.085
## dt_f_$___m_ 0.162 0.071
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.171 0.067
## dt_fr_md$__ 0.082 0.037
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.214 0.135
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_ 0.084
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.089 0.168
## dt_f_$___m_ 0.057 0.134
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.060 0.127
## dt_fr_md$__ 0.030 0.065
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.086 0.207
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_ 0.142
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.150 0.098
## dt_fr_md$__ 0.070 0.047
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.182 0.134
## dt_fr_mdl$dfnd_ppltns_smplfdm_ dt__$__
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_md$__ 0.048
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.133 0.067
Extending from this result, we also tested whether species range
alone is an important predictor of the number of extant populations, as
species range is determined by the geographic spread of the species, but
not necessarily fragmentation
Does the number of maintained pops vary with range?
m.a3<-glmer(n_extant_populations ~ species_range + (1|country_assessment), family = "poisson", data = data_for_model)
summary(m.a3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: n_extant_populations ~ species_range + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 26119.4 26133.8 -13056.7 26113.4 900
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.454 -2.959 -1.215 -0.071 91.960
##
## Random effects:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.7003 0.8368
## Number of obs: 903, groups: country_assessment, 9
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.58170 0.27971 5.655 1.56e-08 ***
## species_rangewide_ranging 0.84563 0.01981 42.688 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## spcs_rngwd_ -0.042
(b) Does the proportion of maintained populations (indicator2) vary
with method used to define populations?
Our next goal was to determine whether study design (i.e. clustering
method to define populations) and/or species-level variables (number of
populations, range type) appropriately were associated with the
measurement of the genetic indicators.
Plot PM indicator by method to define populations:
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot for Proportion of maintained populations (indicator)
pb<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
pb
Plot Scatter plot of indicator2 vs extant pops
psupA<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator2, color=defined_populations_nicenames)) +
geom_point() +
theme_light() +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
theme(legend.position = "none") +
ylab("Proportion of populations maintained within species \n(PM indicator)") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
psupA
psupA.1<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator2, color=species_range)) +
geom_point() +
theme_light() +
theme(legend.position = "none") +
ylab("Proportion of populations maintained within species \n(PM indicator)") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
psupA.1
First we want to test if the Proportion of maintained populations
(indicator 2) vary with method used.
Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 78
## eco_biogeo_proxies
## 30
## genetic_clusters
## 50
## genetic_clusters eco_biogeo_proxies
## 12
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 170
## geographic_boundaries eco_biogeo_proxies
## 41
## geographic_boundaries management_units
## 17
## management_units
## 23
## other
## 9
## other_combinations
## 77
# total n
nrow(data_for_model)
## [1] 548
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model asking: Does Proportion of maintained populations
(indicator 2) vary with method used? Controlling for variation in
indicator2 among countries:
m.b1<-glmmTMB(indicator2 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)
See results:
summary(m.b1)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 667.8 732.4 -318.9 637.8 533
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.332 0.5762
## Number of obs: 548, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.02
##
## Conditional model:
## Estimate
## (Intercept) 0.58166
## defined_populations_simplifieddispersal_buffer 0.25783
## defined_populations_simplifiedeco_biogeo_proxies 0.12833
## defined_populations_simplifiedgenetic_clusters 0.50961
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.45304
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.02774
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.07572
## defined_populations_simplifiedgeographic_boundaries management_units 0.34920
## defined_populations_simplifiedmanagement_units -0.16267
## defined_populations_simplifiedother 0.07798
## defined_populations_simplifiedother_combinations 0.44148
## Std. Error
## (Intercept) 0.22715
## defined_populations_simplifieddispersal_buffer 0.24771
## defined_populations_simplifiedeco_biogeo_proxies 0.21965
## defined_populations_simplifiedgenetic_clusters 0.25953
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.44095
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.21753
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.23547
## defined_populations_simplifiedgeographic_boundaries management_units 0.33807
## defined_populations_simplifiedmanagement_units 0.24894
## defined_populations_simplifiedother 0.51285
## defined_populations_simplifiedother_combinations 0.16872
## z value
## (Intercept) 2.561
## defined_populations_simplifieddispersal_buffer 1.041
## defined_populations_simplifiedeco_biogeo_proxies 0.584
## defined_populations_simplifiedgenetic_clusters 1.964
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.027
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.128
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.322
## defined_populations_simplifiedgeographic_boundaries management_units 1.033
## defined_populations_simplifiedmanagement_units -0.653
## defined_populations_simplifiedother 0.152
## defined_populations_simplifiedother_combinations 2.617
## Pr(>|z|)
## (Intercept) 0.01045
## defined_populations_simplifieddispersal_buffer 0.29795
## defined_populations_simplifiedeco_biogeo_proxies 0.55906
## defined_populations_simplifiedgenetic_clusters 0.04958
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.30423
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.89851
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.74778
## defined_populations_simplifiedgeographic_boundaries management_units 0.30165
## defined_populations_simplifiedmanagement_units 0.51347
## defined_populations_simplifiedother 0.87915
## defined_populations_simplifiedother_combinations 0.00888
##
## (Intercept) *
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Given the preceding relationships detected between method, number of
populations, and species’ range, we investigated associations between
these variables and our indicator values in more detail, to aid in
understanding the underlying mechanisms that were driving the
association between method (especially genetic clusters) and indicator
2. That is, we hypothesised that the relationship between method and
indicator 2 may be an indirect result of the association between method
and number of populations and species range.
First we added number of populations to our model testing the
relationship between method and indicator 2
m.b2<-glmmTMB(indicator2 ~ defined_populations_simplified + n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
summary(m.b2)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ defined_populations_simplified + n_extant_populations +
## (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 668.8 737.7 -318.4 636.8 532
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3308 0.5751
## Number of obs: 548, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.07
##
## Conditional model:
## Estimate
## (Intercept) 0.569872
## defined_populations_simplifieddispersal_buffer 0.262312
## defined_populations_simplifiedeco_biogeo_proxies 0.106217
## defined_populations_simplifiedgenetic_clusters 0.513232
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.466420
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.053513
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.091065
## defined_populations_simplifiedgeographic_boundaries management_units 0.365218
## defined_populations_simplifiedmanagement_units -0.154562
## defined_populations_simplifiedother 0.090016
## defined_populations_simplifiedother_combinations 0.439212
## n_extant_populations 0.001056
## Std. Error
## (Intercept) 0.226915
## defined_populations_simplifieddispersal_buffer 0.246723
## defined_populations_simplifiedeco_biogeo_proxies 0.219530
## defined_populations_simplifiedgenetic_clusters 0.258952
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.440523
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.218963
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.235957
## defined_populations_simplifiedgeographic_boundaries management_units 0.337111
## defined_populations_simplifiedmanagement_units 0.248552
## defined_populations_simplifiedother 0.511511
## defined_populations_simplifiedother_combinations 0.167661
## n_extant_populations 0.001089
## z value
## (Intercept) 2.511
## defined_populations_simplifieddispersal_buffer 1.063
## defined_populations_simplifiedeco_biogeo_proxies 0.484
## defined_populations_simplifiedgenetic_clusters 1.982
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.059
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.244
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.386
## defined_populations_simplifiedgeographic_boundaries management_units 1.083
## defined_populations_simplifiedmanagement_units -0.622
## defined_populations_simplifiedother 0.176
## defined_populations_simplifiedother_combinations 2.620
## n_extant_populations 0.970
## Pr(>|z|)
## (Intercept) 0.0120
## defined_populations_simplifieddispersal_buffer 0.2877
## defined_populations_simplifiedeco_biogeo_proxies 0.6285
## defined_populations_simplifiedgenetic_clusters 0.0475
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.2897
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.8069
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.6995
## defined_populations_simplifiedgeographic_boundaries management_units 0.2786
## defined_populations_simplifiedmanagement_units 0.5340
## defined_populations_simplifiedother 0.8603
## defined_populations_simplifiedother_combinations 0.0088
## n_extant_populations 0.3321
##
## (Intercept) *
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations **
## n_extant_populations
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Then we tested (see plot psupA) if there is a relationship between
number of maintained populations and the PM indicator, overall, and/or
with some methods?
Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 78
## eco_biogeo_proxies
## 30
## genetic_clusters
## 50
## genetic_clusters eco_biogeo_proxies
## 12
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 170
## geographic_boundaries eco_biogeo_proxies
## 41
## geographic_boundaries management_units
## 17
## management_units
## 23
## other
## 9
## other_combinations
## 77
# total n
nrow(data_for_model)
## [1] 548
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
We tested for a relationship between number of populations alone with
indicator 2 in our dataset (i.e. when not controlling for method).
Does number of populations alone affect indicator2 (i.e. not
controlling for method)?:
msupA1 <- glmmTMB(indicator2 ~ n_extant_populations + (1|country_assessment), family = "ordbeta", data= data_for_model)
Summary:
summary(msupA1)
## Family: ordbeta ( logit )
## Formula: indicator2 ~ n_extant_populations + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 663.1 688.9 -325.6 651.1 542
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3217 0.5672
## Number of obs: 548, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.99
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7057076 0.2055940 3.433 0.000598 ***
## n_extant_populations 0.0007805 0.0010848 0.720 0.471812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
But, there were statistically significant interactions between number
of populations and some of the methods used, on indicator 2.
Does the effect of method on indicator2 depend on number of
maintained pops?
# run model
msupA2 <- glmmTMB(indicator2 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
Summary:
summary(msupA2)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ defined_populations_simplified + n_extant_populations +
## defined_populations_simplified * n_extant_populations + (1 |
## country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 657.6 769.5 -302.8 605.6 522
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3069 0.554
## Number of obs: 548, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.65
##
## Conditional model:
## Estimate
## (Intercept) 0.484586
## defined_populations_simplifieddispersal_buffer 0.245632
## defined_populations_simplifiedeco_biogeo_proxies 0.075024
## defined_populations_simplifiedgenetic_clusters 0.548174
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.677871
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.153598
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.107751
## defined_populations_simplifiedgeographic_boundaries management_units 0.107382
## defined_populations_simplifiedmanagement_units 0.360726
## defined_populations_simplifiedother -1.501731
## defined_populations_simplifiedother_combinations 0.302748
## n_extant_populations 0.004369
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.004515
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.001719
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.006833
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations -0.089949
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.006480
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.006729
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.047126
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.032165
## defined_populations_simplifiedother:n_extant_populations 0.466300
## defined_populations_simplifiedother_combinations:n_extant_populations 0.006554
## Std. Error
## (Intercept) 0.222822
## defined_populations_simplifieddispersal_buffer 0.248954
## defined_populations_simplifiedeco_biogeo_proxies 0.246262
## defined_populations_simplifiedgenetic_clusters 0.384296
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.679634
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.233033
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.257803
## defined_populations_simplifiedgeographic_boundaries management_units 0.428411
## defined_populations_simplifiedmanagement_units 0.334857
## defined_populations_simplifiedother 0.970197
## defined_populations_simplifiedother_combinations 0.192382
## n_extant_populations 0.002203
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.007448
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.003376
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.061796
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.033104
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.002886
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.003199
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.053102
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.015044
## defined_populations_simplifiedother:n_extant_populations 0.286151
## defined_populations_simplifiedother_combinations:n_extant_populations 0.005243
## z value
## (Intercept) 2.175
## defined_populations_simplifieddispersal_buffer 0.987
## defined_populations_simplifiedeco_biogeo_proxies 0.305
## defined_populations_simplifiedgenetic_clusters 1.426
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 2.469
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.659
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.418
## defined_populations_simplifiedgeographic_boundaries management_units 0.251
## defined_populations_simplifiedmanagement_units 1.077
## defined_populations_simplifiedother -1.548
## defined_populations_simplifiedother_combinations 1.574
## n_extant_populations 1.983
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.606
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.509
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.111
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations -2.717
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -2.245
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -2.103
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.887
## defined_populations_simplifiedmanagement_units:n_extant_populations -2.138
## defined_populations_simplifiedother:n_extant_populations 1.630
## defined_populations_simplifiedother_combinations:n_extant_populations 1.250
## Pr(>|z|)
## (Intercept) 0.02965
## defined_populations_simplifieddispersal_buffer 0.32381
## defined_populations_simplifiedeco_biogeo_proxies 0.76063
## defined_populations_simplifiedgenetic_clusters 0.15374
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.01356
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.50982
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.67598
## defined_populations_simplifiedgeographic_boundaries management_units 0.80208
## defined_populations_simplifiedmanagement_units 0.28137
## defined_populations_simplifiedother 0.12166
## defined_populations_simplifiedother_combinations 0.11556
## n_extant_populations 0.04737
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.54439
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.61075
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.91196
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.00658
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.02474
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.03544
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.37483
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.03251
## defined_populations_simplifiedother:n_extant_populations 0.10319
## defined_populations_simplifiedother_combinations:n_extant_populations 0.21123
##
## (Intercept) *
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies *
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## n_extant_populations *
## defined_populations_simplifieddispersal_buffer:n_extant_populations
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters:n_extant_populations
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations **
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations *
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations
## defined_populations_simplifiedmanagement_units:n_extant_populations *
## defined_populations_simplifiedother:n_extant_populations
## defined_populations_simplifiedother_combinations:n_extant_populations
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Because the method used to define a population appears to be
important for these relationships, we conducted an additional analysis
to simplify our analysis to only those species for which a single method
was used to determine population clusters, and repeated the model
presented above (evaluating a possible interaction between method and
number of populations on indicator 2).
First, subset the data to only those taxa where a single method was
used:
ind2_single_methods<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>% # doesn't make a difference in the test below, but useful for
filter(defined_populations_simplified=="genetic_clusters" |
defined_populations_simplified=="geographic_boundaries" |
defined_populations_simplified=="eco_biogeo_proxies" |
defined_populations_simplified=="management_units" |
defined_populations_simplified=="dispersal_buffer")
# check number of methods
length(unique(ind2_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind2_single_methods$defined_populations_simplified)
##
## dispersal_buffer eco_biogeo_proxies genetic_clusters
## 78 30 50
## geographic_boundaries management_units
## 170 23
# check n total
nrow(ind2_single_methods)
## [1] 351
# re-level to use geographic boundaries as reference category for the analysis
ind2_single_methods$defined_populations_simplified<-relevel(as.factor(ind2_single_methods$defined_populations_simplified),
ref="geographic_boundaries")
Does the effect of “single” method on indicator2 depend on number of
maintained pops?
msupA3<-glmmTMB(indicator2 ~ n_extant_populations + defined_populations_simplified + n_extant_populations*defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = ind2_single_methods)
# summary
summary(msupA3)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ n_extant_populations + defined_populations_simplified +
## n_extant_populations * defined_populations_simplified + (1 |
## country_assessment)
## Data: ind2_single_methods
##
## AIC BIC logLik deviance df.resid
## 440.9 494.9 -206.4 412.9 337
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.2456 0.4956
## Number of obs: 351, groups: country_assessment, 8
##
## Dispersion parameter for ordbeta family (): 4.28
##
## Conditional model:
## Estimate
## (Intercept) 0.450263
## n_extant_populations 0.003226
## defined_populations_simplifieddispersal_buffer 0.206297
## defined_populations_simplifiedeco_biogeo_proxies -0.004752
## defined_populations_simplifiedgenetic_clusters 0.756034
## defined_populations_simplifiedmanagement_units 0.416852
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.005287
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies -0.001057
## n_extant_populations:defined_populations_simplifiedgenetic_clusters -0.006682
## n_extant_populations:defined_populations_simplifiedmanagement_units -0.035749
## Std. Error
## (Intercept) 0.225035
## n_extant_populations 0.002156
## defined_populations_simplifieddispersal_buffer 0.290865
## defined_populations_simplifiedeco_biogeo_proxies 0.250256
## defined_populations_simplifiedgenetic_clusters 0.386440
## defined_populations_simplifiedmanagement_units 0.340805
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.007506
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies 0.003314
## n_extant_populations:defined_populations_simplifiedgenetic_clusters 0.062699
## n_extant_populations:defined_populations_simplifiedmanagement_units 0.015969
## z value
## (Intercept) 2.001
## n_extant_populations 1.496
## defined_populations_simplifieddispersal_buffer 0.709
## defined_populations_simplifiedeco_biogeo_proxies -0.019
## defined_populations_simplifiedgenetic_clusters 1.956
## defined_populations_simplifiedmanagement_units 1.223
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.704
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies -0.319
## n_extant_populations:defined_populations_simplifiedgenetic_clusters -0.107
## n_extant_populations:defined_populations_simplifiedmanagement_units -2.239
## Pr(>|z|)
## (Intercept) 0.0454
## n_extant_populations 0.1346
## defined_populations_simplifieddispersal_buffer 0.4782
## defined_populations_simplifiedeco_biogeo_proxies 0.9848
## defined_populations_simplifiedgenetic_clusters 0.0504
## defined_populations_simplifiedmanagement_units 0.2213
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.4812
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies 0.7497
## n_extant_populations:defined_populations_simplifiedgenetic_clusters 0.9151
## n_extant_populations:defined_populations_simplifiedmanagement_units 0.0252
##
## (Intercept) *
## n_extant_populations
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters .
## defined_populations_simplifiedmanagement_units
## n_extant_populations:defined_populations_simplifieddispersal_buffer
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies
## n_extant_populations:defined_populations_simplifiedgenetic_clusters
## n_extant_populations:defined_populations_simplifiedmanagement_units *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Because we found a relationship between method and number of
populations on indicator PM, and a relationship between species range
and number of populations, we further tested whether the effect of
method on indicator PM is moderated by species range.
First filter data to consider only wide ranging and restricted
categories (ie remove unknown due to small sampling size)
## Remove unknown
data<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(species_range !="unknown")
# summary of indicator
summary(data$indicator2_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.6667 1.0000 0.8264 1.0000 1.0000
# re-level to use geographic boundaries as reference category for the analysis
data$defined_populations_simplified<-relevel(as.factor(data$defined_populations_simplified),
ref="geographic_boundaries")
# make sure species range is a factor
data$species_range<-as.factor(data$species_range)
Run model: Does method still impact indicator2 if we control for
species range?
## + country
m.b3 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m.b3)
## Family: ordbeta ( logit )
## Formula:
## indicator2_mean ~ defined_populations_simplified + species_range +
## (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 604.4 672.0 -286.2 572.4 488
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3342 0.5781
## Number of obs: 504, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.05
##
## Conditional model:
## Estimate
## (Intercept) 0.60813
## defined_populations_simplifieddispersal_buffer 0.08689
## defined_populations_simplifiedeco_biogeo_proxies -0.10759
## defined_populations_simplifiedgenetic_clusters 0.24518
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.17398
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.17137
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.29427
## defined_populations_simplifiedgeographic_boundaries management_units 0.25307
## defined_populations_simplifiedmanagement_units -0.40456
## defined_populations_simplifiedother -0.11990
## defined_populations_simplifiedother_combinations 0.25946
## species_rangewide ranging 0.37618
## Std. Error
## (Intercept) 0.23025
## defined_populations_simplifieddispersal_buffer 0.25645
## defined_populations_simplifiedeco_biogeo_proxies 0.24302
## defined_populations_simplifiedgenetic_clusters 0.26616
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.52520
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.22048
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.25061
## defined_populations_simplifiedgeographic_boundaries management_units 0.33721
## defined_populations_simplifiedmanagement_units 0.29770
## defined_populations_simplifiedother 0.51409
## defined_populations_simplifiedother_combinations 0.17694
## species_rangewide ranging 0.11794
## z value
## (Intercept) 2.641
## defined_populations_simplifieddispersal_buffer 0.339
## defined_populations_simplifiedeco_biogeo_proxies -0.443
## defined_populations_simplifiedgenetic_clusters 0.921
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.331
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.777
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.174
## defined_populations_simplifiedgeographic_boundaries management_units 0.750
## defined_populations_simplifiedmanagement_units -1.359
## defined_populations_simplifiedother -0.233
## defined_populations_simplifiedother_combinations 1.466
## species_rangewide ranging 3.190
## Pr(>|z|)
## (Intercept) 0.00826
## defined_populations_simplifieddispersal_buffer 0.73475
## defined_populations_simplifiedeco_biogeo_proxies 0.65796
## defined_populations_simplifiedgenetic_clusters 0.35697
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.74044
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.43700
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.24032
## defined_populations_simplifiedgeographic_boundaries management_units 0.45297
## defined_populations_simplifiedmanagement_units 0.17415
## defined_populations_simplifiedother 0.81559
## defined_populations_simplifiedother_combinations 0.14255
## species_rangewide ranging 0.00142
##
## (Intercept) **
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Similarly to the effect of number of populations on indicator 2, we
further tested whether there was an interaction between method and
species range, i.e. to determine whether species range was only
associated with indicator 2 for some methods.
## run model
m.b4 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m.b4)
## Family: ordbeta ( logit )
## Formula:
## indicator2_mean ~ defined_populations_simplified + species_range +
## defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 609.9 719.7 -279.0 557.9 478
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3399 0.583
## Number of obs: 504, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.24
##
## Conditional model:
## Estimate
## (Intercept) 0.56029
## defined_populations_simplifieddispersal_buffer 0.22080
## defined_populations_simplifiedeco_biogeo_proxies -0.17269
## defined_populations_simplifiedgenetic_clusters 0.15661
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.28542
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.21528
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.04464
## defined_populations_simplifiedgeographic_boundaries management_units 0.06902
## defined_populations_simplifiedmanagement_units 0.02335
## defined_populations_simplifiedother -0.38736
## defined_populations_simplifiedother_combinations 0.27749
## species_rangewide ranging 0.51258
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging -0.32986
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.01570
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.06969
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 19.50025
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.05761
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging -0.73680
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.66821
## defined_populations_simplifiedmanagement_units:species_rangewide ranging -0.97281
## defined_populations_simplifiedother:species_rangewide ranging 18.31094
## defined_populations_simplifiedother_combinations:species_rangewide ranging -0.12222
## Std. Error
## (Intercept) 0.23411
## defined_populations_simplifieddispersal_buffer 0.28808
## defined_populations_simplifiedeco_biogeo_proxies 0.31444
## defined_populations_simplifiedgenetic_clusters 0.40422
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.54464
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.26468
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.29365
## defined_populations_simplifiedgeographic_boundaries management_units 0.37642
## defined_populations_simplifiedmanagement_units 0.40119
## defined_populations_simplifiedother 0.52825
## defined_populations_simplifiedother_combinations 0.23063
## species_rangewide ranging 0.22564
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging 0.34952
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.48091
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.53952
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 9115.05701
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.46190
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 0.45403
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.85982
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 0.59336
## defined_populations_simplifiedother:species_rangewide ranging 6189.79540
## defined_populations_simplifiedother_combinations:species_rangewide ranging 0.37479
## z value
## (Intercept) 2.393
## defined_populations_simplifieddispersal_buffer 0.766
## defined_populations_simplifiedeco_biogeo_proxies -0.549
## defined_populations_simplifiedgenetic_clusters 0.387
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.524
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.813
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.152
## defined_populations_simplifiedgeographic_boundaries management_units 0.183
## defined_populations_simplifiedmanagement_units 0.058
## defined_populations_simplifiedother -0.733
## defined_populations_simplifiedother_combinations 1.203
## species_rangewide ranging 2.272
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging -0.944
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.033
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.129
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.002
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.125
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging -1.623
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.777
## defined_populations_simplifiedmanagement_units:species_rangewide ranging -1.640
## defined_populations_simplifiedother:species_rangewide ranging 0.003
## defined_populations_simplifiedother_combinations:species_rangewide ranging -0.326
## Pr(>|z|)
## (Intercept) 0.0167
## defined_populations_simplifieddispersal_buffer 0.4434
## defined_populations_simplifiedeco_biogeo_proxies 0.5829
## defined_populations_simplifiedgenetic_clusters 0.6984
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.6002
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.4160
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.8792
## defined_populations_simplifiedgeographic_boundaries management_units 0.8545
## defined_populations_simplifiedmanagement_units 0.9536
## defined_populations_simplifiedother 0.4634
## defined_populations_simplifiedother_combinations 0.2289
## species_rangewide ranging 0.0231
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging 0.3453
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.9740
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.8972
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.9983
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.9007
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 0.1046
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.4371
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 0.1011
## defined_populations_simplifiedother:species_rangewide ranging 0.9976
## defined_populations_simplifiedother_combinations:species_rangewide ranging 0.7443
##
## (Intercept) *
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging *
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging
## defined_populations_simplifiedmanagement_units:species_rangewide ranging
## defined_populations_simplifiedother:species_rangewide ranging
## defined_populations_simplifiedother_combinations:species_rangewide ranging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(c) Proportion of populations with Ne>500 (indicator1)
Our analysis of Ne indicator followed a parallel structure to our
analysis of PM indicator.
Plot Ne indicator by method to define pops.
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot
pc<- df %>%
ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
pc
Scatter plot of indicator1 vs extant pops
psupB<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator1, color=defined_populations_nicenames)) +
geom_point() +
theme_light() +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
theme(legend.position = "none") +
ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
psupB
## Coloring by range
psupB.1<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator1, color=species_range)) +
geom_point() +
theme_light() +
theme(legend.position = "none") +
ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
psupB.1
First we tested whether method used was associated with variation in
indicator (figure c)
Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 138
## eco_biogeo_proxies
## 17
## genetic_clusters
## 57
## genetic_clusters eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 159
## geographic_boundaries eco_biogeo_proxies
## 56
## geographic_boundaries management_units
## 20
## management_units
## 13
## other
## 6
## other_combinations
## 68
# total n
nrow(data_for_model)
## [1] 583
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model asking: Does Ne indicator vary with method used?
Controlling for variation in indicator among countries:
m.c1<-glmmTMB(indicator1 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)
See results:
summary(m.c1)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1076.9 1142.4 -523.4 1046.9 568
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.09219 0.3036
## Number of obs: 583, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.94
##
## Conditional model:
## Estimate
## (Intercept) -0.87457
## defined_populations_simplifieddispersal_buffer 0.32313
## defined_populations_simplifiedeco_biogeo_proxies -0.13817
## defined_populations_simplifiedgenetic_clusters 0.60539
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.05417
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.53362
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.10274
## defined_populations_simplifiedgeographic_boundaries management_units 0.62579
## defined_populations_simplifiedmanagement_units -0.05004
## defined_populations_simplifiedother 1.03617
## defined_populations_simplifiedother_combinations 0.36633
## Std. Error
## (Intercept) 0.18019
## defined_populations_simplifieddispersal_buffer 0.30131
## defined_populations_simplifiedeco_biogeo_proxies 0.33283
## defined_populations_simplifiedgenetic_clusters 0.24218
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.43950
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.24566
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.33923
## defined_populations_simplifiedgeographic_boundaries management_units 0.33188
## defined_populations_simplifiedmanagement_units 0.41560
## defined_populations_simplifiedother 0.62076
## defined_populations_simplifiedother_combinations 0.20284
## z value
## (Intercept) -4.854
## defined_populations_simplifieddispersal_buffer 1.072
## defined_populations_simplifiedeco_biogeo_proxies -0.415
## defined_populations_simplifiedgenetic_clusters 2.500
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 2.399
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 2.172
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.303
## defined_populations_simplifiedgeographic_boundaries management_units 1.886
## defined_populations_simplifiedmanagement_units -0.120
## defined_populations_simplifiedother 1.669
## defined_populations_simplifiedother_combinations 1.806
## Pr(>|z|)
## (Intercept) 1.21e-06
## defined_populations_simplifieddispersal_buffer 0.2835
## defined_populations_simplifiedeco_biogeo_proxies 0.6780
## defined_populations_simplifiedgenetic_clusters 0.0124
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.0165
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.0298
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.7620
## defined_populations_simplifiedgeographic_boundaries management_units 0.0593
## defined_populations_simplifiedmanagement_units 0.9042
## defined_populations_simplifiedother 0.0951
## defined_populations_simplifiedother_combinations 0.0709
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies *
## defined_populations_simplifiedgenetic_clusters geographic_boundaries *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units .
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother .
## defined_populations_simplifiedother_combinations .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
we next investigated whether the relationships between methods and
the indicator were moderated by the role of number of populations and
species range. Ie:
Does method still influence indicator1 if we control for number of
populations?
m.c2 <- glmmTMB(indicator1 ~ defined_populations_simplified + n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
summary(m.c2)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + n_extant_populations +
## (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1070.7 1140.6 -519.4 1038.7 567
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.06352 0.252
## Number of obs: 583, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.34
##
## Conditional model:
## Estimate
## (Intercept) -0.749921
## defined_populations_simplifieddispersal_buffer 0.204457
## defined_populations_simplifiedeco_biogeo_proxies -0.115881
## defined_populations_simplifiedgenetic_clusters 0.529194
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.002282
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.481752
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.132066
## defined_populations_simplifiedgeographic_boundaries management_units 0.613413
## defined_populations_simplifiedmanagement_units -0.069282
## defined_populations_simplifiedother 0.941856
## defined_populations_simplifiedother_combinations 0.365829
## n_extant_populations -0.004817
## Std. Error
## (Intercept) 0.171514
## defined_populations_simplifieddispersal_buffer 0.282390
## defined_populations_simplifiedeco_biogeo_proxies 0.324865
## defined_populations_simplifiedgenetic_clusters 0.237933
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.427234
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.241202
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.325036
## defined_populations_simplifiedgeographic_boundaries management_units 0.324258
## defined_populations_simplifiedmanagement_units 0.404008
## defined_populations_simplifiedother 0.613463
## defined_populations_simplifiedother_combinations 0.195921
## n_extant_populations 0.001793
## z value
## (Intercept) -4.372
## defined_populations_simplifieddispersal_buffer 0.724
## defined_populations_simplifiedeco_biogeo_proxies -0.357
## defined_populations_simplifiedgenetic_clusters 2.224
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 2.346
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 1.997
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.406
## defined_populations_simplifiedgeographic_boundaries management_units 1.892
## defined_populations_simplifiedmanagement_units -0.171
## defined_populations_simplifiedother 1.535
## defined_populations_simplifiedother_combinations 1.867
## n_extant_populations -2.686
## Pr(>|z|)
## (Intercept) 1.23e-05
## defined_populations_simplifieddispersal_buffer 0.46905
## defined_populations_simplifiedeco_biogeo_proxies 0.72131
## defined_populations_simplifiedgenetic_clusters 0.02614
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.01898
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.04579
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.68451
## defined_populations_simplifiedgeographic_boundaries management_units 0.05853
## defined_populations_simplifiedmanagement_units 0.86384
## defined_populations_simplifiedother 0.12471
## defined_populations_simplifiedother_combinations 0.06187
## n_extant_populations 0.00722
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies *
## defined_populations_simplifiedgenetic_clusters geographic_boundaries *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units .
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations .
## n_extant_populations **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We then tested if there a relationship between number of maintained
populations and indicator1, overall, and/or with some methods? (model
associated to plot psupB)
Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 138
## eco_biogeo_proxies
## 17
## genetic_clusters
## 57
## genetic_clusters eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 159
## geographic_boundaries eco_biogeo_proxies
## 56
## geographic_boundaries management_units
## 20
## management_units
## 13
## other
## 6
## other_combinations
## 68
# total n
nrow(data_for_model)
## [1] 583
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Does the number of maintained pops alone affect the Ne indicator?
(i.e. not controlling for method)
msupB1<-glmmTMB(indicator1 ~ n_extant_populations + (1|country_assessment), family = "ordbeta", data= data_for_model)
Summary:
summary(msupB1)
## Family: ordbeta ( logit )
## Formula: indicator1 ~ n_extant_populations + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1068.4 1094.6 -528.2 1056.4 577
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.05776 0.2403
## Number of obs: 583, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.12
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.508216 0.121453 -4.184 2.86e-05 ***
## n_extant_populations -0.005068 0.001785 -2.840 0.00452 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Does the effect of method depend on the number of populations? Or put
another way, does the importance of number of populations also depend on
method?
# run model
msupB2 <- glmmTMB(indicator1 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
Summary:
summary(msupB2)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + n_extant_populations +
## defined_populations_simplified * n_extant_populations + (1 |
## country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1069.9 1183.5 -509.0 1017.9 557
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.08214 0.2866
## Number of obs: 583, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.59
##
## Conditional model:
## Estimate
## (Intercept) -0.844485
## defined_populations_simplifieddispersal_buffer 0.335391
## defined_populations_simplifiedeco_biogeo_proxies 0.174199
## defined_populations_simplifiedgenetic_clusters 1.218872
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.035842
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.679535
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.666074
## defined_populations_simplifiedgeographic_boundaries management_units 0.976410
## defined_populations_simplifiedmanagement_units 0.393074
## defined_populations_simplifiedother -0.830917
## defined_populations_simplifiedother_combinations 0.451677
## n_extant_populations -0.001790
## defined_populations_simplifieddispersal_buffer:n_extant_populations -0.003756
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.016485
## defined_populations_simplifiedgenetic_clusters:n_extant_populations -0.163071
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.010626
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.015564
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.112722
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations -0.029777
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.055134
## defined_populations_simplifiedother:n_extant_populations 0.723094
## defined_populations_simplifiedother_combinations:n_extant_populations -0.003590
## Std. Error
## (Intercept) 0.184583
## defined_populations_simplifieddispersal_buffer 0.281788
## defined_populations_simplifiedeco_biogeo_proxies 0.415555
## defined_populations_simplifiedgenetic_clusters 0.370473
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.629121
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.305613
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.493042
## defined_populations_simplifiedgeographic_boundaries management_units 0.413475
## defined_populations_simplifiedmanagement_units 0.608449
## defined_populations_simplifiedother 1.566674
## defined_populations_simplifiedother_combinations 0.206091
## n_extant_populations 0.002594
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.004392
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.014623
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.074140
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.084344
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.022508
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.058564
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.028876
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.067769
## defined_populations_simplifiedother:n_extant_populations 0.784606
## defined_populations_simplifiedother_combinations:n_extant_populations 0.003941
## z value
## (Intercept) -4.575
## defined_populations_simplifieddispersal_buffer 1.190
## defined_populations_simplifiedeco_biogeo_proxies 0.419
## defined_populations_simplifiedgenetic_clusters 3.290
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.646
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 2.224
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 1.351
## defined_populations_simplifiedgeographic_boundaries management_units 2.361
## defined_populations_simplifiedmanagement_units 0.646
## defined_populations_simplifiedother -0.530
## defined_populations_simplifiedother_combinations 2.192
## n_extant_populations -0.690
## defined_populations_simplifieddispersal_buffer:n_extant_populations -0.855
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -1.127
## defined_populations_simplifiedgenetic_clusters:n_extant_populations -2.199
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.126
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.691
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -1.925
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations -1.031
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.814
## defined_populations_simplifiedother:n_extant_populations 0.922
## defined_populations_simplifiedother_combinations:n_extant_populations -0.911
## Pr(>|z|)
## (Intercept) 4.76e-06
## defined_populations_simplifieddispersal_buffer 0.2340
## defined_populations_simplifiedeco_biogeo_proxies 0.6751
## defined_populations_simplifiedgenetic_clusters 0.0010
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.0997
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.0262
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.1767
## defined_populations_simplifiedgeographic_boundaries management_units 0.0182
## defined_populations_simplifiedmanagement_units 0.5183
## defined_populations_simplifiedother 0.5959
## defined_populations_simplifiedother_combinations 0.0284
## n_extant_populations 0.4901
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.3924
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.2596
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.0278
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.8997
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.4893
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.0543
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.3024
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.4159
## defined_populations_simplifiedother:n_extant_populations 0.3567
## defined_populations_simplifiedother_combinations:n_extant_populations 0.3624
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters **
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies .
## defined_populations_simplifiedgenetic_clusters geographic_boundaries *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units *
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations *
## n_extant_populations
## defined_populations_simplifieddispersal_buffer:n_extant_populations
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters:n_extant_populations *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations .
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations
## defined_populations_simplifiedmanagement_units:n_extant_populations
## defined_populations_simplifiedother:n_extant_populations
## defined_populations_simplifiedother_combinations:n_extant_populations
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Because “what’s a population and how do you define them?” is such an
important question, we can also test the effect of methods alone. First,
subset the data to only those taxa where a single method was used:
ind1_single_methods<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>% # doesn't make a difference in the test below, but useful for
filter(defined_populations_simplified=="genetic_clusters" |
defined_populations_simplified=="geographic_boundaries" |
defined_populations_simplified=="eco_biogeo_proxies" |
defined_populations_simplified=="management_units" |
defined_populations_simplified=="dispersal_buffer")
# check number of methods
length(unique(ind1_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind1_single_methods$defined_populations_simplified)
##
## dispersal_buffer eco_biogeo_proxies genetic_clusters
## 138 17 57
## geographic_boundaries management_units
## 159 13
# check n total
nrow(ind1_single_methods)
## [1] 384
# re-level to use geographic boundaries as reference category for the analysis
ind1_single_methods$defined_populations_simplified<-relevel(as.factor(ind1_single_methods$defined_populations_simplified),
ref="geographic_boundaries")
Run model:
# run model
msupB3 <- glmmTMB(indicator1 ~ n_extant_populations + defined_populations_simplified + n_extant_populations*defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = ind1_single_methods)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
Summary:
summary(msupB3)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ n_extant_populations + defined_populations_simplified +
## n_extant_populations * defined_populations_simplified + (1 |
## country_assessment)
## Data: ind1_single_methods
##
## AIC BIC logLik deviance df.resid
## 693.4 748.7 -332.7 665.4 370
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.06262 0.2502
## Number of obs: 384, groups: country_assessment, 8
##
## Dispersion parameter for ordbeta family (): 3.77
##
## Conditional model:
## Estimate
## (Intercept) -0.8043666
## n_extant_populations -0.0004889
## defined_populations_simplifieddispersal_buffer 0.1986189
## defined_populations_simplifiedeco_biogeo_proxies 0.1772549
## defined_populations_simplifiedgenetic_clusters 1.1361411
## defined_populations_simplifiedmanagement_units 0.3503307
## n_extant_populations:defined_populations_simplifieddispersal_buffer -0.0035132
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies -0.0148845
## n_extant_populations:defined_populations_simplifiedgenetic_clusters -0.1510877
## n_extant_populations:defined_populations_simplifiedmanagement_units -0.0499400
## Std. Error
## (Intercept) 0.2065387
## n_extant_populations 0.0026816
## defined_populations_simplifieddispersal_buffer 0.3988868
## defined_populations_simplifiedeco_biogeo_proxies 0.4355460
## defined_populations_simplifiedgenetic_clusters 0.3855972
## defined_populations_simplifiedmanagement_units 0.6110674
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.0043956
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies 0.0147960
## n_extant_populations:defined_populations_simplifiedgenetic_clusters 0.0782808
## n_extant_populations:defined_populations_simplifiedmanagement_units 0.0663490
## z value
## (Intercept) -3.895
## n_extant_populations -0.182
## defined_populations_simplifieddispersal_buffer 0.498
## defined_populations_simplifiedeco_biogeo_proxies 0.407
## defined_populations_simplifiedgenetic_clusters 2.946
## defined_populations_simplifiedmanagement_units 0.573
## n_extant_populations:defined_populations_simplifieddispersal_buffer -0.799
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies -1.006
## n_extant_populations:defined_populations_simplifiedgenetic_clusters -1.930
## n_extant_populations:defined_populations_simplifiedmanagement_units -0.753
## Pr(>|z|)
## (Intercept) 9.84e-05
## n_extant_populations 0.85533
## defined_populations_simplifieddispersal_buffer 0.61853
## defined_populations_simplifiedeco_biogeo_proxies 0.68403
## defined_populations_simplifiedgenetic_clusters 0.00321
## defined_populations_simplifiedmanagement_units 0.56644
## n_extant_populations:defined_populations_simplifieddispersal_buffer 0.42414
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies 0.31443
## n_extant_populations:defined_populations_simplifiedgenetic_clusters 0.05360
## n_extant_populations:defined_populations_simplifiedmanagement_units 0.45164
##
## (Intercept) ***
## n_extant_populations
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters **
## defined_populations_simplifiedmanagement_units
## n_extant_populations:defined_populations_simplifieddispersal_buffer
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies
## n_extant_populations:defined_populations_simplifiedgenetic_clusters .
## n_extant_populations:defined_populations_simplifiedmanagement_units
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finally, we tested for associations between range type on Ne>500
indicator.
First filter data to consider only wide ranging and restricted
categories (ie remove unknown due to small sampling size)
## Remove unknown
data<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(species_range !="unknown")
# summary of indicator
summary(data$indicator1_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2698 0.5000 1.0000
# re-level to use geographic boundaries as reference category for the analysis
data$defined_populations_simplified<-relevel(as.factor(data$defined_populations_simplified),
ref="geographic_boundaries")
# make sure specis range is a factor
data$species_range<-as.factor(data$species_range)
Is there still an effect of method on indicator1 if we control for
species range?
data_full<- indicators_full %>% # notice this uses indicators_full instead of indicators_average_one
filter(!is.na(indicator1)) %>% # notice this uses indicator1 instead of _mean
filter(species_range !="unknown")
# summary of indicator
summary(data_full$indicator1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2675 0.5000 1.0000
# re-level to use geographic boundaries as reference category for the analysis
data$defined_populations_simplified<-relevel(as.factor(data$defined_populations_simplified),
ref="geographic_boundaries")
# make sure specis range is a factor
data$species_range<-as.factor(data$species_range)
# run model
m.c3_full <- glmmTMB(indicator1 ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m.c3_full)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + species_range +
## (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 992.6 1061.4 -480.3 960.6 528
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.1029 0.3207
## Number of obs: 544, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.76
##
## Conditional model:
## Estimate
## (Intercept) -1.0968
## defined_populations_simplifieddispersal_buffer 0.1197
## defined_populations_simplifiedeco_biogeo_proxies -0.3347
## defined_populations_simplifiedgenetic_clusters 0.7089
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.8151
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.3957
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.1833
## defined_populations_simplifiedgeographic_boundaries management_units 0.4764
## defined_populations_simplifiedmanagement_units -0.1179
## defined_populations_simplifiedother 1.0387
## defined_populations_simplifiedother_combinations 0.2185
## species_rangewide ranging 0.5671
## Std. Error
## (Intercept) 0.1968
## defined_populations_simplifieddispersal_buffer 0.2878
## defined_populations_simplifiedeco_biogeo_proxies 0.3808
## defined_populations_simplifiedgenetic_clusters 0.2609
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.4551
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.2536
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.3635
## defined_populations_simplifiedgeographic_boundaries management_units 0.3413
## defined_populations_simplifiedmanagement_units 0.4359
## defined_populations_simplifiedother 0.6180
## defined_populations_simplifiedother_combinations 0.2119
## species_rangewide ranging 0.1391
## z value
## (Intercept) -5.574
## defined_populations_simplifieddispersal_buffer 0.416
## defined_populations_simplifiedeco_biogeo_proxies -0.879
## defined_populations_simplifiedgenetic_clusters 2.717
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.791
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 1.560
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.504
## defined_populations_simplifiedgeographic_boundaries management_units 1.396
## defined_populations_simplifiedmanagement_units -0.270
## defined_populations_simplifiedother 1.681
## defined_populations_simplifiedother_combinations 1.031
## species_rangewide ranging 4.077
## Pr(>|z|)
## (Intercept) 2.49e-08
## defined_populations_simplifieddispersal_buffer 0.67755
## defined_populations_simplifiedeco_biogeo_proxies 0.37937
## defined_populations_simplifiedgenetic_clusters 0.00659
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.07326
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.11878
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.61405
## defined_populations_simplifiedgeographic_boundaries management_units 0.16283
## defined_populations_simplifiedmanagement_units 0.78682
## defined_populations_simplifiedother 0.09283
## defined_populations_simplifiedother_combinations 0.30235
## species_rangewide ranging 4.57e-05
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters **
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies .
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother .
## defined_populations_simplifiedother_combinations
## species_rangewide ranging ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finally, we tested interactions between method and species range, to
determine whether the effect of species range only applies when some
methods are used.
Is the effect of method on Ne indicator moderated by species
range?
data_full<- indicators_full %>% # notice this uses indicators_full instead of indicators_average_one
filter(!is.na(indicator1)) %>% # notice this uses indicator1 instead of _mean
filter(species_range !="unknown")
## run model
m.c4 <- glmmTMB(indicator1 ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data_full)
# summary results
summary(m.c4)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + species_range +
## defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data_full
##
## AIC BIC logLik deviance df.resid
## 1016.1 1128.6 -482.0 964.1 534
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.1088 0.3299
## Number of obs: 560, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.05
##
## Conditional model:
## Estimate
## (Intercept) -0.7759
## defined_populations_simplifiedeco_biogeo_proxies -0.1573
## defined_populations_simplifiedgenetic_clusters -0.2305
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -17.2560
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.3871
## defined_populations_simplifiedgeographic_boundaries -0.3148
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.8122
## defined_populations_simplifiedgeographic_boundaries management_units 0.4190
## defined_populations_simplifiedmanagement_units -20.2405
## defined_populations_simplifiedother 0.3363
## defined_populations_simplifiedother_combinations 0.3922
## species_rangewide_ranging 0.3299
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide_ranging -0.6963
## defined_populations_simplifiedgenetic_clusters:species_rangewide_ranging 0.9480
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide_ranging 18.1390
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide_ranging 0.9368
## defined_populations_simplifiedgeographic_boundaries:species_rangewide_ranging 0.2220
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide_ranging 0.9206
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide_ranging -0.2184
## defined_populations_simplifiedmanagement_units:species_rangewide_ranging 20.2239
## defined_populations_simplifiedother:species_rangewide_ranging 23.0542
## defined_populations_simplifiedother_combinations:species_rangewide_ranging -0.5542
## Std. Error
## (Intercept) 0.3266
## defined_populations_simplifiedeco_biogeo_proxies 0.5381
## defined_populations_simplifiedgenetic_clusters 0.4615
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 4341.1110
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.5074
## defined_populations_simplifiedgeographic_boundaries 0.3672
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.5578
## defined_populations_simplifiedgeographic_boundaries management_units 0.5662
## defined_populations_simplifiedmanagement_units 11060.9529
## defined_populations_simplifiedother 0.7425
## defined_populations_simplifiedother_combinations 0.3867
## species_rangewide_ranging 0.2724
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide_ranging 0.6907
## defined_populations_simplifiedgenetic_clusters:species_rangewide_ranging 0.5017
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide_ranging 4341.1110
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide_ranging 0.5404
## defined_populations_simplifiedgeographic_boundaries:species_rangewide_ranging 0.3655
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide_ranging 0.7128
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide_ranging 0.6987
## defined_populations_simplifiedmanagement_units:species_rangewide_ranging 11060.9529
## defined_populations_simplifiedother:species_rangewide_ranging 46288.4039
## defined_populations_simplifiedother_combinations:species_rangewide_ranging 0.4065
## z value
## (Intercept) -2.376
## defined_populations_simplifiedeco_biogeo_proxies -0.292
## defined_populations_simplifiedgenetic_clusters -0.499
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.763
## defined_populations_simplifiedgeographic_boundaries -0.857
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.456
## defined_populations_simplifiedgeographic_boundaries management_units 0.740
## defined_populations_simplifiedmanagement_units -0.002
## defined_populations_simplifiedother 0.453
## defined_populations_simplifiedother_combinations 1.014
## species_rangewide_ranging 1.211
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide_ranging -1.008
## defined_populations_simplifiedgenetic_clusters:species_rangewide_ranging 1.890
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide_ranging 0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide_ranging 1.734
## defined_populations_simplifiedgeographic_boundaries:species_rangewide_ranging 0.608
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide_ranging 1.292
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide_ranging -0.313
## defined_populations_simplifiedmanagement_units:species_rangewide_ranging 0.002
## defined_populations_simplifiedother:species_rangewide_ranging 0.000
## defined_populations_simplifiedother_combinations:species_rangewide_ranging -1.363
## Pr(>|z|)
## (Intercept) 0.0175
## defined_populations_simplifiedeco_biogeo_proxies 0.7700
## defined_populations_simplifiedgenetic_clusters 0.6176
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.9968
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.4455
## defined_populations_simplifiedgeographic_boundaries 0.3912
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.1454
## defined_populations_simplifiedgeographic_boundaries management_units 0.4593
## defined_populations_simplifiedmanagement_units 0.9985
## defined_populations_simplifiedother 0.6506
## defined_populations_simplifiedother_combinations 0.3105
## species_rangewide_ranging 0.2258
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide_ranging 0.3134
## defined_populations_simplifiedgenetic_clusters:species_rangewide_ranging 0.0588
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide_ranging 0.9967
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide_ranging 0.0830
## defined_populations_simplifiedgeographic_boundaries:species_rangewide_ranging 0.5435
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide_ranging 0.1965
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide_ranging 0.7546
## defined_populations_simplifiedmanagement_units:species_rangewide_ranging 0.9985
## defined_populations_simplifiedother:species_rangewide_ranging 0.9996
## defined_populations_simplifiedother_combinations:species_rangewide_ranging 0.1728
##
## (Intercept) *
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide_ranging
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide_ranging
## defined_populations_simplifiedgenetic_clusters:species_rangewide_ranging .
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide_ranging
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide_ranging .
## defined_populations_simplifiedgeographic_boundaries:species_rangewide_ranging
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide_ranging
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide_ranging
## defined_populations_simplifiedmanagement_units:species_rangewide_ranging
## defined_populations_simplifiedother:species_rangewide_ranging
## defined_populations_simplifiedother_combinations:species_rangewide_ranging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note the previous model has high Str.Errors, so this model does not
converge.
Main Figure 3 (representing the models above): Single plot 5 panels.
Boxplots plots for the effect of method on: number of populations,
proportion of maintained populations (indicator 2) and Proportion of
populations with Ne>500 (indicator 1), AND Violin plots for the
distribution of the indicator values by range type.
Top a,b,c panel boxplots:
##### plot for Proportion of maintained populations (indicator 2) only with n in axis labels
# sample size
sample_size <- indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
pb.1<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev,
labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
theme(text = element_text(size = 13))
##### plot for Proportion populations Ne>500 (indicator 1) only with n in axis labels
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
## plot
pc.1<- df %>%
ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev,
labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
theme(text = element_text(size = 13))
## Plot 3 panels
p_top<-plot_grid(pa, pb.1, pc.1, ncol=3, rel_widths = c(1.8,1,1), align = "h", labels=c("A)", "B)", "C)"))
p_top
Bottom d, e violin plots. Indicators by of range type coloring points
to show genetic clusters
For PM indicator:
# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
pd<-indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator2_mean)) +
geom_violin(width=1, linewidth = 0, fill="grey70") +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
new_scale_color() + # to color points without confuisng ggplot
geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
scale_color_manual(values=c("red", "black")) +
labs(color=NULL) + # hide legend title
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
For Ne indicator:
# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
pe <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator1_mean)) +
geom_violin(width=1, linewidth = 0, fill="grey70") +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
new_scale_color() + # to color the points without confusing ggplot
geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
scale_color_manual(values=c("red", "black")) +
theme_light() +
labs(color="Method to \ndefine populations") + # nicer legend title
theme(panel.border = element_blank(), legend.position="right", text= element_text(size=20))
## Joining, by = "species_range"
Two panel figure:
p_bot<-plot_grid(pd + theme(legend.position = "non2"), # legend can be shown only below both plots
pe,
ncol = 2,
rel_widths = c(1,1.3), align = "h", labels=c("D)", "E)"))
Both top and bottom together:
plot_grid(p_top, p_bot, ncol = 1)
ggsave("Fig3.pdf", width = 52, height = 30 , units = "cm")
Indicatros by threat status (IUCN Red List)
All the following plots and analyses consider the average of
multiassessed species (variable _mean
), so that they are
shown only once.
(a) Ne > 500 indicator and red list status
Plot indicator 1 by global IUCN in the entire dataset:
## Global IUCN
# Capitalize abbreviations
indicators_averaged_one$global_IUCN<-as.factor(indicators_averaged_one$global_IUCN)
levels(indicators_averaged_one$global_IUCN)
## [1] "cr" "dd" "en" "lc" "not_assessed"
## [6] "nt" "unknown" "vu"
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN,
levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed","unknown"))
levels(indicators_averaged_one$global_IUCN) <- c("CR", "EN", "VU", "NT", "LC", "DD", "NE", "unknown")
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("CR", unique(df$myaxis), value = TRUE),
grep("EN", unique(df$myaxis), value = TRUE),
grep("VU", unique(df$myaxis), value = TRUE),
grep("NT", unique(df$myaxis), value = TRUE),
grep("LC", unique(df$myaxis), value = TRUE),
grep("DD", unique(df$myaxis), value = TRUE),
grep("NE", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
# plot
p1<-df %>%
ggplot(aes(x=myaxis, y=indicator1_mean , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15))
p1
Summary table:
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
kable(x, digits=2)
CR |
46 |
0.11 |
0.00 |
84.78 |
86.96 |
93.48 |
6.52 |
6.52 |
EN |
48 |
0.25 |
0.00 |
66.67 |
70.83 |
81.25 |
18.75 |
18.75 |
VU |
66 |
0.32 |
0.00 |
56.06 |
59.09 |
78.79 |
22.73 |
21.21 |
NT |
51 |
0.24 |
0.00 |
54.90 |
72.55 |
84.31 |
15.69 |
15.69 |
LC |
186 |
0.37 |
0.05 |
47.31 |
55.91 |
72.04 |
29.03 |
27.96 |
DD |
10 |
0.44 |
0.21 |
40.00 |
50.00 |
60.00 |
40.00 |
40.00 |
NE |
156 |
0.19 |
0.00 |
63.46 |
74.36 |
91.03 |
8.97 |
8.97 |
unknown |
3 |
0.67 |
1.00 |
33.33 |
33.33 |
33.33 |
66.67 |
66.67 |
Indicator 1 by country and global IUCN
# plot
indicators_averaged_one %>%
filter(!is.na(regional_redlist)) %>%
# plot
ggplot(aes(x=global_IUCN, y=indicator1_mean, fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(indicators_averaged_one$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
ggtitle("global IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=13)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 342 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 342 rows containing missing values (`geom_point()`).
(b) Proportion of Maintained Populations and red list status?
Plot indicator 2 by global IUCN in the entire dataset:
## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("CR", unique(df$myaxis), value = TRUE),
grep("EN", unique(df$myaxis), value = TRUE),
grep("VU", unique(df$myaxis), value = TRUE),
grep("NT", unique(df$myaxis), value = TRUE),
grep("LC", unique(df$myaxis), value = TRUE),
grep("DD", unique(df$myaxis), value = TRUE),
grep("NE", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
# plot
p2<-df %>%
ggplot(aes(x=myaxis, y=indicator2 , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15))
p2
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
Summary table:
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
per.0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75)/ n *100,
per1=sum(indicator2_mean==1) / n *100)
kable(x, digits=2)
CR |
36 |
0.83 |
1.00 |
0.00 |
5.56 |
36.11 |
75.00 |
63.89 |
EN |
59 |
0.79 |
0.86 |
0.00 |
1.69 |
50.85 |
61.02 |
49.15 |
VU |
65 |
0.78 |
0.91 |
1.54 |
3.08 |
49.23 |
60.00 |
44.62 |
NT |
42 |
0.83 |
1.00 |
0.00 |
4.76 |
38.10 |
71.43 |
57.14 |
LC |
152 |
0.84 |
1.00 |
0.66 |
3.29 |
34.21 |
72.37 |
61.18 |
DD |
9 |
0.71 |
0.83 |
0.00 |
0.00 |
66.67 |
66.67 |
33.33 |
NE |
153 |
0.84 |
0.95 |
0.65 |
1.96 |
40.52 |
69.93 |
48.37 |
unknown |
2 |
1.00 |
1.00 |
0.00 |
0.00 |
0.00 |
100.00 |
100.00 |
Indicator 2 by country and global IUCN
# plot
indicators_averaged_one %>%
filter(!is.na(regional_redlist)) %>%
# plot
ggplot(aes(x=global_IUCN, y=indicator2_mean, fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(indicators_averaged_one$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
ggtitle("global IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=13)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 390 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 390 rows containing missing values (`geom_point()`).
Main Figure 5: Single plot 2 pannels IUCN redlist and indicator
range values
plot_grid(p2,
p1,
ncol=1, align = "v", labels=c("A)", "B)"))
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggsave("Fig5.pdf", width = 20, height = 25 , units = "cm")
Indicator values by taxonomic group
All the following plots and analyses consider the average of
multiassessed species (variable _mean), so that they are shown only
once.
We also grouped taxa with small n (<5) into “others”, according to
the following table:
table(indicators_averaged_one$taxonomic_group)
##
## amphibian bird fish invertebrate mammal
## 56 167 62 135 135
## reptile angiosperm bryophyte gymnosperm pteridophytes
## 70 235 5 19 14
## fungus other
## 3 18
They are grouped along with “other” in a new category “others” in the
new variable taxonomic_group_simplified
:
indicators_averaged_one <- indicators_averaged_one %>%
ungroup() %>%
mutate(taxonomic_group_simplified = case_when(
# if the taxon group is in the list of groups with small n change to "others"
as.character(taxonomic_group) %!in% c("bryophyte", "fungus", "other") ~ as.character(taxonomic_group),
TRUE ~ "others"))
# check:
table(indicators_averaged_one$taxonomic_group_simplified)
##
## amphibian angiosperm bird fish gymnosperm
## 56 235 167 62 19
## invertebrate mammal others pteridophytes reptile
## 135 135 26 14 70
We also create a group of only 3 categories for animals, plants and
others:
# Define the grouping map
grouping_map <- c(
"amphibian", "bird", "fish", "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes", "others"
)
# Create a new variable taxonomic_group_3
indicators_averaged_one <- indicators_averaged_one %>%
mutate(
taxonomic_group_3 = case_when(
taxonomic_group_simplified %in% grouping_map[1:5] ~ "animals",
taxonomic_group_simplified %in% grouping_map[6:9] ~ "plants",
taxonomic_group_simplified %in% grouping_map[10] ~ "others",
TRUE ~ NA_character_
)
)
# reorder levels
indicators_averaged_one$taxonomic_group_3<- factor(indicators_averaged_one$taxonomic_group_3,
levels=c("animals", "plants", "others"))
Violin plots, histograms and summary tables for each indicator by
taxonomic group
Indicator Ne > 500
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
group_by(taxonomic_group_simplified) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("amphibian", unique(df$myaxis), value = TRUE),
grep("bird" , unique(df$myaxis), value = TRUE),
grep("fish" , unique(df$myaxis), value = TRUE),
grep("invertebrate", unique(df$myaxis), value = TRUE),
grep("mammal", unique(df$myaxis), value = TRUE),
grep("reptile", unique(df$myaxis), value = TRUE),
grep("angiosperm", unique(df$myaxis), value = TRUE),
grep("gymnosperm", unique(df$myaxis), value = TRUE),
grep("pteridophytes", unique(df$myaxis), value = TRUE),
grep("others" , unique(df$myaxis), value = TRUE)))
df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified,
levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal", "reptile",
"angiosperm", "gymnosperm", "pteridophytes",
"others"))
# plot
p1<-df %>%
ggplot(aes(x=myaxis, y=indicator1_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
geom_violin(width=1.5, linewidth = 0.2) +
geom_jitter(size=.7, width = 0.1, color="black") +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
coord_flip() +
scale_x_discrete(limits=rev) +
scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text= element_text(size=15))
p1
## Warning: `position_dodge()` requires non-overlapping x intervals
Table with sampling size, mean indicator value and proporiton of taxa
where the value is below 0.25, 0.50 and 0.75:
#summary table by taxonomic group
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_simplified)) %>%
group_by(taxonomic_group_simplified) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
n.below.75=sum(indicator1_mean<0.75),
n.below.50=sum(indicator1_mean<0.50),
n.below.25=sum(indicator1_mean<0.25),
per.below.25=n.below.25/n*100,
per.below.50=n.below.50/n*100)
# Calculate total counts and means
total_counts <- summarise(x,
taxonomic_group_simplified = "ALL",
n = sum(n),
mean= mean(mean),
median=median(median),
n.below.75 = sum(n.below.75),
n.below.50 = sum(n.below.50),
n.below.25 = sum(n.below.25),
per.below.25 = n.below.25 / n * 100,
per.below.50 = n.below.50 / n * 100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes",
"others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)
# show nice table
kable(summary_table, digits=2)
amphibian |
26 |
0.16 |
0.00 |
25 |
21 |
19 |
73.08 |
80.77 |
bird |
91 |
0.32 |
0.00 |
66 |
60 |
58 |
63.74 |
65.93 |
fish |
34 |
0.39 |
0.20 |
25 |
20 |
18 |
52.94 |
58.82 |
invertebrate |
65 |
0.28 |
0.00 |
51 |
45 |
44 |
67.69 |
69.23 |
mammal |
96 |
0.42 |
0.08 |
62 |
54 |
50 |
52.08 |
56.25 |
angiosperm |
188 |
0.18 |
0.00 |
170 |
154 |
140 |
74.47 |
81.91 |
gymnosperm |
15 |
0.16 |
0.00 |
13 |
13 |
12 |
80.00 |
86.67 |
reptile |
32 |
0.29 |
0.00 |
24 |
23 |
21 |
65.62 |
71.88 |
pteridophytes |
11 |
0.18 |
0.00 |
11 |
8 |
8 |
72.73 |
72.73 |
others |
10 |
0.15 |
0.00 |
9 |
8 |
8 |
80.00 |
80.00 |
ALL |
568 |
0.25 |
0.00 |
456 |
406 |
378 |
66.55 |
71.48 |
Indicator Proportion of maintained populations:
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
group_by(taxonomic_group_simplified) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("amphibian", unique(df$myaxis), value = TRUE),
grep("bird" , unique(df$myaxis), value = TRUE),
grep("fish" , unique(df$myaxis), value = TRUE),
grep("invertebrate", unique(df$myaxis), value = TRUE),
grep("mammal", unique(df$myaxis), value = TRUE),
grep("reptile", unique(df$myaxis), value = TRUE),
grep("angiosperm", unique(df$myaxis), value = TRUE),
grep("gymnosperm", unique(df$myaxis), value = TRUE),
grep("pteridophytes", unique(df$myaxis), value = TRUE),
grep("others" , unique(df$myaxis), value = TRUE)))
df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified,
levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal", "reptile",
"angiosperm", "gymnosperm", "pteridophytes",
"others"))
# plot
p2<-df %>%
ggplot(aes(x=myaxis, y=indicator2_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
geom_violin(width=1, linewidth = 0.2) +
geom_jitter(size=.7, width = 0.1, color="black") +
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
coord_flip() +
scale_x_discrete(limits=rev) +
scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text= element_text(size=15))
p2
Table with sampling size, mean indicator value and proporiton of taxa
where the value is below 0.25, 0.50 and 0.75:
# summary table for taxonomic group:
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_simplified)) %>%
group_by(taxonomic_group_simplified) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
n.below.75=sum(indicator2_mean<0.75),
n.below.50=sum(indicator2_mean<0.50),
n.below.25=sum(indicator2_mean<0.25),
per.below.25=n.below.25/n*100,
per.below.50=n.below.50/n*100)
# Calculate total counts and means
total_counts <- summarise(x,
taxonomic_group_simplified = "ALL",
n = sum(n),
mean = mean(mean),
median = median(median),
n.below.75 = sum(n.below.75),
n.below.50 = sum(n.below.50),
n.below.25 = sum(n.below.25),
per.below.25 = n.below.25 / n * 100,
per.below.50 = n.below.50 / n * 100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes",
"others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)
# show nice table
kable(summary_table, digits=2)
amphibian |
43 |
0.85 |
1.00 |
9 |
4 |
1 |
2.33 |
9.30 |
bird |
68 |
0.79 |
1.00 |
25 |
9 |
2 |
2.94 |
13.24 |
fish |
40 |
0.78 |
0.86 |
17 |
3 |
1 |
2.50 |
7.50 |
invertebrate |
77 |
0.67 |
0.67 |
40 |
21 |
7 |
9.09 |
27.27 |
mammal |
80 |
0.94 |
1.00 |
8 |
3 |
0 |
0.00 |
3.75 |
angiosperm |
139 |
0.83 |
1.00 |
36 |
13 |
4 |
2.88 |
9.35 |
gymnosperm |
9 |
0.97 |
1.00 |
0 |
0 |
0 |
0.00 |
0.00 |
reptile |
35 |
0.90 |
1.00 |
5 |
2 |
0 |
0.00 |
5.71 |
pteridophytes |
8 |
0.82 |
1.00 |
3 |
1 |
0 |
0.00 |
12.50 |
others |
19 |
0.82 |
0.88 |
6 |
1 |
0 |
0.00 |
5.26 |
ALL |
518 |
0.84 |
1.00 |
149 |
57 |
15 |
2.90 |
11.00 |
Histograms and summary tables by 3 taxonomic groups (animals,
plants, others)
By animals, plants, others:
# Create a histogram
hist_p1 <- indicators_averaged_one %>%
ggplot(aes(x = indicator1_mean, fill = taxonomic_group_3)) +
geom_histogram( bins = 25, color="white") + # Adjust the number of bins as needed
labs(x = "Proportion of populations within species with Ne>500 \n(Ne 500 indicator)", y = "Frequency") +
scale_fill_manual(
values = grouped_taxon_colors, # Custom colors for animals, plants, and others
breaks = c("animals", "plants", "others"),
name = "Taxonomic Group")+
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15),
legend.position = "right") +
guides(fill = guide_legend(title = NULL))
# plot
hist_p1
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).
Summary table for Ne indicator 3 taxonomic groups:
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
group_by(taxonomic_group_3) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
ungroup() %>%
summarize(taxonomic_group_3 = "ALL",
n= n(),
mean = mean(indicator1_mean),
median = median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)
kable(summary_table, digits=2)
animals |
312 |
0.34 |
0 |
54.49 |
60.58 |
74.04 |
26.28 |
25.96 |
plants |
246 |
0.19 |
0 |
61.79 |
73.58 |
90.24 |
10.57 |
9.76 |
others |
10 |
0.15 |
0 |
80.00 |
80.00 |
90.00 |
10.00 |
10.00 |
ALL |
568 |
0.27 |
0 |
58.10 |
66.55 |
81.34 |
19.19 |
18.66 |
PM Histogram for animal, plants, others:
# Create a histogram
hist_p2 <- indicators_averaged_one %>%
ggplot(aes(x = indicator2_mean, fill = taxonomic_group_3)) +
geom_histogram(bins = 25, color="white") + # Adjust the number of bins as needed
labs(x = "Proportion of populations maintained within species \n(PM indicator)", y = "Frequency") +
scale_fill_manual(
values = grouped_taxon_colors, # Custom colors for animals, plants, and others
breaks = c("animals", "plants", "others"),
name = "Taxonomic Group")+
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p2
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).
Summary table for PM indicator 3 taxonomic groups
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
group_by(taxonomic_group_3) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
per0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75) / n *100,
per1=sum(indicator2_mean==1) / n *100)
# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
ungroup() %>%
summarize(taxonomic_group_3 = "ALL",
n= n(),
mean = mean(indicator2_mean),
median = median(indicator2_mean),
per0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75) / n *100,
per1=sum(indicator2_mean==1) / n *100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)
kable(summary_table, digits=2)
animals |
308 |
0.81 |
1.00 |
0.65 |
3.57 |
42.21 |
65.91 |
53.57 |
plants |
191 |
0.85 |
1.00 |
0.52 |
2.09 |
37.17 |
74.35 |
56.02 |
others |
19 |
0.82 |
0.88 |
0.00 |
0.00 |
52.63 |
63.16 |
26.32 |
ALL |
518 |
0.82 |
1.00 |
0.58 |
2.90 |
40.73 |
68.92 |
53.47 |
Main Figure 4: Single figure 4 panels for violin plots and
histograms for both indicators by taxonomic group
plot_grid(p2, hist_p2,
p1, hist_p1,
ncol=2, align = "v", labels=c("A)", "B)", "C)", "D)"))
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).
## Warning: `position_dodge()` requires non-overlapping x intervals
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
ggsave("Fig4.pdf", width = 37, height = 25 , units = "cm")
Supplementary Figure S2: Values of indicator 1 and indicator 2 for
multiassessed taxa (alternative assessments)
#subset only with taxa assessed multiple times:
only_multi<-indicators_full %>%
filter(multiassessment=="multiassessment")
First, check how indicator 1 changes across the multiassessments.
p1<-only_multi %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator1) > 1) %>%
# plot
ggplot(aes(x=taxon, y=indicator1)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
labs(color="country") +
ylim(0, 1)+
coord_flip() +
theme_light() +
theme(axis.text.y = element_text(face = "italic"), panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p1
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).
Now check how Proportion of maintained populations (indicator 2)
changes across the multiassessments.
p2<-only_multi %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator2) > 1) %>%
ggplot(aes(x=taxon, y=indicator2)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
scale_color_manual(values= scales::hue_pal()(4)[2:4]) + # last 3 colors to make them the same than the other plot
xlab("") + ylab("Proportion of populations maintained within species \n(PM indicator)") +
labs(color="country") +
coord_flip() +
theme_light() +
theme(axis.text.y = element_text(face = "italic"), panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p2
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 5 rows containing missing values (`geom_point()`).
Plot together:
plot_grid(p2, p1,
rel_heights = c(1.4, 0.8),
ncol=1, labels=c("A)", "B)"))
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 5 rows containing missing values (`geom_point()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave("FigS2.pdf", width = 20, height = 25 , units = "cm")
Indicator 3 (number of species with genetic diversity
monitoring)
Indicator 3 refers to the number (count) of taxa by country in which
genetic monitoring is occurring. This is stored in the variable
temp_gen_monitoring as a “yes/no” answer for each taxon.
indicator3
Plot by global IUCN redlist status
# desired order of levels
ind3_data$global_IUCN<-factor(as.factor(indicators_full$global_IUCN), levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
## plot
ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring, global_IUCN) %>%
filter(!duplicated(.)) %>%
# count "yes" in tem_gen_monitoring by country
filter(temp_gen_monitoring=="yes") %>%
ggplot(aes(x=country_assessment, fill=global_IUCN)) +
geom_bar() +
xlab("") + ylab("Number of taxa with temporal genetic diversity monitoring") +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=levels(as.factor(indicators_full$global_IUCN))) +
theme_light()
Supplementary Figure S9: Sankey plot genetic data availability
Relatively few taxa have genetic monitoring, but many have some sort
of genetic study. Let’s check that with a Sankey Plot:
# check original n of assessments
nrow(ind3_data)
## [1] 966
# first subset the ind3_data keeping only taxa assessed a single time, plust the first record of those assessed multiple times.
ind3_data_firstmulti<-ind3_data[!duplicated(cbind(ind3_data$taxon, ind3_data$country_assessment)), ]
# n taxa
nrow(ind3_data_firstmulti)
## [1] 919
# transform data to how ggsankey wants it
df <- ind3_data_firstmulti %>%
make_long(country_assessment, temp_gen_monitoring, gen_studies)
# plot
ggplot(df, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = factor(node),
label = node)) +
geom_sankey(flow.alpha = 0.5,
show.legend = FALSE) +
geom_sankey_label(size = 2.5, color = "black", fill = "white") +
theme_sankey(base_size = 10) +
# manually set flow fill according to desired color
# countries
scale_fill_manual(values=c(scales::hue_pal()(length(unique(ind3_data_firstmulti$country_assessment))),
# traffic light for monitoring
c("darkolivegreen", "brown3", "darkgrey"),
# nice soft colors for gen_studies
c("grey50", "grey35", "grey50", "brown3")),
breaks=c(unique(ind3_data_firstmulti$country_assessment),
unique(ind3_data_firstmulti$temp_gen_monitoring),
unique(ind3_data_firstmulti$gen_studies))) +
xlab("")
## Warning: Removed 2 rows containing missing values (`geom_label()`).
ggsave("FigS9.pdf", width = 20, height = 18 , units = "cm")
## Warning: Removed 2 rows containing missing values (`geom_label()`).
table(ind3_data_firstmulti$gen_studies)
##
## no phylo phylo_pop pop
## 375 190 244 99
Count data:
ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, gen_studies, temp_gen_monitoring) %>%
filter(!duplicated(.)) %>%
group_by(country_assessment, temp_gen_monitoring, gen_studies) %>%
summarise(n_studies=n())
## `summarise()` has grouped output by 'country_assessment',
## 'temp_gen_monitoring'. You can override using the `.groups` argument.
How many genetic studies ara available by country for species without
temporal genetic diversity monitoring?
## plot
ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring, gen_studies) %>%
filter(!duplicated(.)) %>%
# keep only taxa without gen div monitoring
filter(temp_gen_monitoring=="no")%>%
ggplot(aes(x=country_assessment, fill=gen_studies)) +
geom_bar() +
scale_fill_manual(values=c("grey80", scales::hue_pal()(3)))+
xlab("") +
theme_light()
Summary table of mean indicator values and n
The tables below show the indicator values and sampling size
averaging them by country, taxonomic group, distribution type or IUCN
global red list status. For this summary the mean of the multiassessed
species was considering and counted as a single entry for the sampling
size.
Codes for indicator names:
- PM.ind: Proportion of Mantained populations
indicator (indicator 2)
- Ne.ind: Proportion of populations where Ne>500
indicator (indicator 1)
- Mon.ind: Number of species where genetic diversity
monitoring is taking place (indicator 3)
Codes for summary stats:
- n: sampling size (number of taxa assessed) without
missing data
- mean: mean value for the indicator value
- sd: standar deviation for the indicator value
Summary stats by country:
x<-indicators_averaged_one %>%
group_by(country_assessment) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
# nice table
kable(x, digits=3)
Australia |
28 |
0.903 |
0.178 |
47 |
0.170 |
0.299 |
Belgium |
27 |
0.453 |
0.221 |
101 |
0.246 |
0.381 |
Colombia |
22 |
0.601 |
0.174 |
43 |
0.326 |
0.474 |
France |
34 |
0.854 |
0.278 |
55 |
0.416 |
0.471 |
Japan |
50 |
0.925 |
0.152 |
50 |
0.077 |
0.180 |
Mexico |
28 |
0.936 |
0.135 |
47 |
0.217 |
0.354 |
South Africa |
90 |
0.948 |
0.155 |
61 |
0.422 |
0.475 |
Sweden |
120 |
0.777 |
0.271 |
83 |
0.188 |
0.331 |
USA |
117 |
0.794 |
0.244 |
79 |
0.354 |
0.410 |
Taxonomic groups
Summary stats by taxonomic group:
x<-indicators_averaged_one %>%
group_by(taxonomic_group) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
# nice table
kable(x, digits=3)
amphibian |
43 |
0.833 |
0.244 |
26 |
0.150 |
0.250 |
bird |
67 |
0.789 |
0.265 |
91 |
0.321 |
0.445 |
fish |
40 |
0.768 |
0.245 |
34 |
0.414 |
0.448 |
invertebrate |
77 |
0.671 |
0.309 |
65 |
0.277 |
0.403 |
mammal |
80 |
0.937 |
0.161 |
95 |
0.419 |
0.461 |
reptile |
35 |
0.902 |
0.176 |
31 |
0.288 |
0.437 |
angiosperm |
138 |
0.834 |
0.242 |
188 |
0.177 |
0.311 |
bryophyte |
4 |
0.688 |
0.252 |
2 |
0.250 |
0.354 |
gymnosperm |
9 |
0.975 |
0.050 |
15 |
0.161 |
0.353 |
pteridophytes |
8 |
0.824 |
0.251 |
11 |
0.179 |
0.284 |
fungus |
3 |
0.903 |
0.167 |
2 |
0.500 |
0.707 |
other |
12 |
0.844 |
0.141 |
6 |
0.000 |
0.000 |
Detailed table:
x<-indicators_averaged_one %>%
group_by(country_assessment, taxonomic_group) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
Australia |
amphibian |
0 |
NaN |
NA |
1 |
0.000 |
NA |
Australia |
bird |
9 |
1.000 |
0.000 |
9 |
0.167 |
0.264 |
Australia |
fish |
1 |
1.000 |
NA |
2 |
0.500 |
0.707 |
Australia |
invertebrate |
1 |
0.500 |
NA |
0 |
NaN |
NA |
Australia |
mammal |
3 |
0.750 |
0.250 |
10 |
0.303 |
0.359 |
Australia |
reptile |
7 |
0.958 |
0.078 |
5 |
0.050 |
0.112 |
Australia |
angiosperm |
2 |
0.700 |
0.424 |
15 |
0.115 |
0.276 |
Australia |
bryophyte |
0 |
NaN |
NA |
1 |
0.500 |
NA |
Australia |
gymnosperm |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
Australia |
pteridophytes |
0 |
NaN |
NA |
1 |
0.000 |
NA |
Australia |
other |
5 |
0.887 |
0.141 |
1 |
0.000 |
NA |
Belgium |
amphibian |
3 |
0.310 |
0.170 |
9 |
0.186 |
0.330 |
Belgium |
fish |
5 |
0.570 |
0.153 |
9 |
0.206 |
0.352 |
Belgium |
invertebrate |
10 |
0.444 |
0.259 |
30 |
0.323 |
0.416 |
Belgium |
mammal |
3 |
0.444 |
0.192 |
19 |
0.447 |
0.497 |
Belgium |
reptile |
0 |
NaN |
NA |
4 |
0.030 |
0.026 |
Belgium |
angiosperm |
5 |
0.446 |
0.279 |
26 |
0.093 |
0.219 |
Belgium |
bryophyte |
1 |
0.444 |
NA |
1 |
0.000 |
NA |
Belgium |
gymnosperm |
0 |
NaN |
NA |
1 |
0.050 |
NA |
Belgium |
pteridophytes |
0 |
NaN |
NA |
2 |
0.250 |
0.354 |
Colombia |
amphibian |
2 |
0.625 |
0.177 |
0 |
NaN |
NA |
Colombia |
bird |
19 |
0.604 |
0.181 |
31 |
0.419 |
0.502 |
Colombia |
fish |
0 |
NaN |
NA |
2 |
0.500 |
0.707 |
Colombia |
mammal |
1 |
0.500 |
NA |
1 |
0.000 |
NA |
Colombia |
reptile |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
Colombia |
angiosperm |
0 |
NaN |
NA |
6 |
0.000 |
0.000 |
Colombia |
other |
0 |
NaN |
NA |
1 |
0.000 |
NA |
France |
amphibian |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
France |
bird |
11 |
0.852 |
0.259 |
20 |
0.342 |
0.460 |
France |
fish |
1 |
0.167 |
NA |
6 |
0.589 |
0.463 |
France |
invertebrate |
3 |
0.700 |
0.265 |
7 |
0.405 |
0.508 |
France |
mammal |
11 |
0.955 |
0.151 |
10 |
0.217 |
0.416 |
France |
reptile |
1 |
1.000 |
NA |
2 |
0.500 |
0.707 |
France |
angiosperm |
3 |
0.667 |
0.577 |
6 |
0.583 |
0.492 |
France |
gymnosperm |
1 |
1.000 |
NA |
2 |
1.000 |
0.000 |
France |
fungus |
1 |
1.000 |
NA |
1 |
1.000 |
NA |
France |
other |
1 |
0.900 |
NA |
0 |
NaN |
NA |
Japan |
angiosperm |
39 |
0.931 |
0.130 |
39 |
0.061 |
0.148 |
Japan |
gymnosperm |
4 |
1.000 |
0.000 |
4 |
0.000 |
0.000 |
Japan |
pteridophytes |
7 |
0.847 |
0.262 |
7 |
0.210 |
0.316 |
Mexico |
amphibian |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
Mexico |
bird |
1 |
0.667 |
NA |
2 |
0.500 |
0.707 |
Mexico |
fish |
0 |
NaN |
NA |
0 |
NaN |
NA |
Mexico |
invertebrate |
1 |
1.000 |
NA |
0 |
NaN |
NA |
Mexico |
mammal |
3 |
0.867 |
0.231 |
3 |
0.000 |
0.000 |
Mexico |
reptile |
1 |
1.000 |
NA |
4 |
0.500 |
0.577 |
Mexico |
angiosperm |
20 |
0.959 |
0.120 |
29 |
0.236 |
0.339 |
Mexico |
gymnosperm |
2 |
0.886 |
0.005 |
6 |
0.061 |
0.148 |
Mexico |
pteridophytes |
0 |
NaN |
NA |
1 |
0.000 |
NA |
South Africa |
amphibian |
18 |
0.918 |
0.173 |
4 |
0.125 |
0.250 |
South Africa |
bird |
11 |
1.000 |
0.000 |
11 |
0.327 |
0.467 |
South Africa |
fish |
9 |
1.000 |
0.000 |
4 |
0.297 |
0.477 |
South Africa |
invertebrate |
0 |
NaN |
NA |
0 |
NaN |
NA |
South Africa |
mammal |
32 |
0.992 |
0.044 |
31 |
0.608 |
0.480 |
South Africa |
reptile |
7 |
0.869 |
0.254 |
1 |
1.000 |
NA |
South Africa |
angiosperm |
12 |
0.833 |
0.277 |
10 |
0.060 |
0.190 |
South Africa |
gymnosperm |
1 |
1.000 |
NA |
0 |
NaN |
NA |
Sweden |
amphibian |
13 |
0.891 |
0.183 |
9 |
0.192 |
0.219 |
Sweden |
bird |
11 |
0.696 |
0.385 |
9 |
0.111 |
0.333 |
Sweden |
fish |
7 |
0.738 |
0.290 |
4 |
0.299 |
0.476 |
Sweden |
invertebrate |
29 |
0.674 |
0.292 |
20 |
0.078 |
0.225 |
Sweden |
mammal |
20 |
0.986 |
0.047 |
15 |
0.361 |
0.447 |
Sweden |
reptile |
7 |
0.983 |
0.045 |
3 |
0.619 |
0.541 |
Sweden |
angiosperm |
22 |
0.622 |
0.259 |
18 |
0.159 |
0.258 |
Sweden |
bryophyte |
2 |
0.904 |
0.048 |
0 |
NaN |
NA |
Sweden |
pteridophytes |
1 |
0.667 |
NA |
0 |
NaN |
NA |
Sweden |
fungus |
2 |
0.855 |
0.205 |
1 |
0.000 |
NA |
Sweden |
other |
6 |
0.800 |
0.153 |
4 |
0.000 |
0.000 |
USA |
amphibian |
6 |
0.754 |
0.267 |
0 |
NaN |
NA |
USA |
bird |
5 |
0.741 |
0.205 |
9 |
0.254 |
0.375 |
USA |
fish |
17 |
0.737 |
0.198 |
7 |
0.615 |
0.448 |
USA |
invertebrate |
33 |
0.730 |
0.324 |
8 |
0.492 |
0.471 |
USA |
mammal |
7 |
0.905 |
0.194 |
6 |
0.303 |
0.351 |
USA |
reptile |
12 |
0.823 |
0.202 |
10 |
0.271 |
0.444 |
USA |
angiosperm |
35 |
0.867 |
0.181 |
39 |
0.332 |
0.398 |
USA |
bryophyte |
1 |
0.500 |
NA |
0 |
NaN |
NA |
USA |
gymnosperm |
1 |
1.000 |
NA |
0 |
NaN |
NA |
IUCN
Summary stats:
x<-indicators_averaged_one %>%
group_by(global_IUCN) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
# nice table
kable(x, digits=3)
CR |
36 |
0.825 |
0.272 |
46 |
0.109 |
0.284 |
EN |
59 |
0.786 |
0.254 |
48 |
0.260 |
0.415 |
VU |
64 |
0.778 |
0.253 |
66 |
0.310 |
0.414 |
NT |
42 |
0.821 |
0.263 |
50 |
0.237 |
0.375 |
LC |
152 |
0.845 |
0.251 |
185 |
0.365 |
0.436 |
DD |
9 |
0.707 |
0.313 |
10 |
0.442 |
0.490 |
NE |
152 |
0.833 |
0.235 |
156 |
0.187 |
0.329 |
unknown |
2 |
1.000 |
0.000 |
3 |
0.667 |
0.577 |
NA |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
Detailed table by IUCN category:
x<-indicators_averaged_one %>%
group_by(country_assessment, global_IUCN) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
Australia |
CR |
5 |
0.860 |
0.219 |
10 |
0.000 |
0.000 |
Australia |
EN |
4 |
0.850 |
0.300 |
7 |
0.167 |
0.264 |
Australia |
VU |
6 |
0.943 |
0.101 |
8 |
0.260 |
0.355 |
Australia |
NT |
4 |
1.000 |
0.000 |
5 |
0.353 |
0.328 |
Australia |
LC |
3 |
1.000 |
0.000 |
8 |
0.229 |
0.367 |
Australia |
NE |
6 |
0.822 |
0.202 |
9 |
0.128 |
0.329 |
Australia |
unknown |
0 |
NaN |
NA |
0 |
NaN |
NA |
Belgium |
CR |
1 |
0.333 |
NA |
2 |
0.500 |
0.707 |
Belgium |
EN |
1 |
0.455 |
NA |
1 |
0.000 |
NA |
Belgium |
VU |
3 |
0.548 |
0.410 |
3 |
0.333 |
0.577 |
Belgium |
NT |
2 |
0.310 |
0.034 |
13 |
0.030 |
0.058 |
Belgium |
LC |
19 |
0.466 |
0.215 |
64 |
0.285 |
0.398 |
Belgium |
DD |
1 |
0.333 |
NA |
3 |
0.364 |
0.553 |
Belgium |
NE |
0 |
NaN |
NA |
14 |
0.151 |
0.292 |
Belgium |
unknown |
0 |
NaN |
NA |
1 |
1.000 |
NA |
Colombia |
CR |
2 |
0.450 |
0.071 |
7 |
0.000 |
0.000 |
Colombia |
EN |
4 |
0.525 |
0.145 |
3 |
0.667 |
0.577 |
Colombia |
VU |
11 |
0.659 |
0.195 |
15 |
0.133 |
0.352 |
Colombia |
NT |
3 |
0.550 |
0.180 |
6 |
0.667 |
0.516 |
Colombia |
LC |
2 |
0.667 |
0.000 |
10 |
0.600 |
0.516 |
Colombia |
NA |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
France |
CR |
2 |
0.583 |
0.589 |
5 |
0.040 |
0.089 |
France |
EN |
1 |
1.000 |
NA |
3 |
0.333 |
0.577 |
France |
VU |
4 |
0.725 |
0.320 |
9 |
0.481 |
0.467 |
France |
NT |
7 |
0.839 |
0.277 |
6 |
0.333 |
0.516 |
France |
LC |
17 |
0.953 |
0.133 |
28 |
0.476 |
0.482 |
France |
DD |
0 |
NaN |
NA |
2 |
1.000 |
0.000 |
France |
NE |
3 |
0.633 |
0.551 |
2 |
0.000 |
0.000 |
Japan |
CR |
2 |
1.000 |
0.000 |
2 |
0.000 |
0.000 |
Japan |
EN |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
Japan |
LC |
3 |
1.000 |
0.000 |
3 |
0.021 |
0.036 |
Japan |
NE |
44 |
0.914 |
0.159 |
44 |
0.086 |
0.190 |
Mexico |
CR |
4 |
1.000 |
0.000 |
3 |
0.333 |
0.577 |
Mexico |
EN |
9 |
0.919 |
0.163 |
12 |
0.083 |
0.289 |
Mexico |
VU |
5 |
0.900 |
0.224 |
5 |
0.000 |
0.000 |
Mexico |
NT |
1 |
0.889 |
NA |
2 |
0.000 |
0.000 |
Mexico |
LC |
5 |
0.936 |
0.092 |
12 |
0.497 |
0.367 |
Mexico |
DD |
1 |
1.000 |
NA |
1 |
0.333 |
NA |
Mexico |
NE |
3 |
0.958 |
0.072 |
12 |
0.158 |
0.318 |
South Africa |
CR |
14 |
0.860 |
0.285 |
12 |
0.042 |
0.144 |
South Africa |
EN |
16 |
0.895 |
0.182 |
9 |
0.467 |
0.469 |
South Africa |
VU |
14 |
0.982 |
0.067 |
12 |
0.500 |
0.522 |
South Africa |
NT |
8 |
0.969 |
0.088 |
8 |
0.253 |
0.356 |
South Africa |
LC |
34 |
1.000 |
0.000 |
18 |
0.667 |
0.485 |
South Africa |
DD |
1 |
1.000 |
NA |
0 |
NaN |
NA |
South Africa |
NE |
2 |
0.750 |
0.354 |
1 |
0.000 |
NA |
South Africa |
unknown |
1 |
1.000 |
NA |
1 |
1.000 |
NA |
Sweden |
EN |
5 |
0.489 |
0.208 |
2 |
0.050 |
0.071 |
Sweden |
VU |
7 |
0.685 |
0.247 |
7 |
0.297 |
0.363 |
Sweden |
NT |
8 |
0.816 |
0.273 |
5 |
0.054 |
0.074 |
Sweden |
LC |
63 |
0.836 |
0.259 |
41 |
0.247 |
0.374 |
Sweden |
DD |
4 |
0.549 |
0.299 |
4 |
0.250 |
0.500 |
Sweden |
NE |
33 |
0.744 |
0.268 |
24 |
0.085 |
0.228 |
USA |
CR |
6 |
0.828 |
0.164 |
5 |
0.467 |
0.447 |
USA |
EN |
18 |
0.743 |
0.268 |
10 |
0.300 |
0.483 |
USA |
VU |
14 |
0.664 |
0.271 |
7 |
0.427 |
0.311 |
USA |
NT |
9 |
0.796 |
0.289 |
5 |
0.284 |
0.435 |
USA |
LC |
6 |
0.791 |
0.208 |
1 |
0.000 |
NA |
USA |
DD |
2 |
0.917 |
0.118 |
0 |
NaN |
NA |
USA |
NE |
61 |
0.829 |
0.234 |
50 |
0.365 |
0.415 |
USA |
unknown |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
Distribution type
Summary stats:
x<-indicators_averaged_one %>%
group_by(species_range) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
# nice table
kable(x, digits=3)
restricted |
309 |
0.795 |
0.266 |
313 |
0.187 |
0.344 |
unknown |
14 |
0.760 |
0.259 |
20 |
0.300 |
0.470 |
wide ranging |
193 |
0.867 |
0.217 |
231 |
0.384 |
0.432 |
NA |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
Detailed table by IUCN category:
x<-indicators_averaged_one %>%
group_by(country_assessment, species_range) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
Australia |
restricted |
14 |
0.865 |
0.224 |
27 |
0.114 |
0.253 |
Australia |
unknown |
0 |
NaN |
NA |
1 |
0.000 |
NA |
Australia |
wide ranging |
14 |
0.942 |
0.110 |
19 |
0.260 |
0.347 |
Belgium |
restricted |
10 |
0.319 |
0.128 |
22 |
0.135 |
0.262 |
Belgium |
unknown |
2 |
0.456 |
0.062 |
5 |
0.000 |
0.000 |
Belgium |
wide ranging |
15 |
0.542 |
0.242 |
74 |
0.295 |
0.411 |
Colombia |
restricted |
16 |
0.614 |
0.193 |
28 |
0.286 |
0.460 |
Colombia |
unknown |
5 |
0.547 |
0.117 |
10 |
0.500 |
0.527 |
Colombia |
wide ranging |
1 |
0.667 |
NA |
3 |
0.333 |
0.577 |
Colombia |
NA |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
France |
restricted |
14 |
0.741 |
0.336 |
28 |
0.227 |
0.388 |
France |
wide ranging |
20 |
0.933 |
0.202 |
27 |
0.611 |
0.476 |
Japan |
restricted |
35 |
0.939 |
0.141 |
35 |
0.080 |
0.180 |
Japan |
unknown |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
Japan |
wide ranging |
14 |
0.884 |
0.179 |
14 |
0.076 |
0.192 |
Mexico |
restricted |
19 |
0.933 |
0.138 |
31 |
0.094 |
0.267 |
Mexico |
unknown |
2 |
1.000 |
0.000 |
0 |
NaN |
NA |
Mexico |
wide ranging |
7 |
0.926 |
0.150 |
16 |
0.456 |
0.385 |
South Africa |
restricted |
41 |
0.905 |
0.206 |
29 |
0.217 |
0.391 |
South Africa |
unknown |
2 |
1.000 |
0.000 |
1 |
1.000 |
NA |
South Africa |
wide ranging |
47 |
0.984 |
0.081 |
31 |
0.595 |
0.475 |
Sweden |
restricted |
71 |
0.708 |
0.292 |
53 |
0.076 |
0.210 |
Sweden |
unknown |
2 |
1.000 |
0.000 |
2 |
0.000 |
0.000 |
Sweden |
wide ranging |
47 |
0.871 |
0.204 |
28 |
0.415 |
0.408 |
USA |
restricted |
89 |
0.813 |
0.243 |
60 |
0.367 |
0.418 |
USA |
unknown |
0 |
NaN |
NA |
0 |
NaN |
NA |
USA |
wide ranging |
28 |
0.735 |
0.244 |
19 |
0.314 |
0.393 |
Simplified figures and basic stats for text summary and policy
brief
How many species and pops:
How many species:
nrow(indicators_averaged_one)
## [1] 919
How many assessments (including species assessed more than once):
nrow(indicators_full)
## [1] 966
How many populations, including all pops from species that were
assessed more than once:
nrow(ind1_data)
## [1] 5652
How many populations by country (including multiassesments):
npops<-ind1_data %>%
group_by(country_assessment) %>%
summarise(n_pops=n())
npops
mean(npops$n_pops)
## [1] 628
sd(npops$n_pops)
## [1] 590.631
How many populations, counting only once populations from taxa
assessed more than once:
# This looks for the id of the taxa keeping only 1 for the multiassessed taxa, and keeps those int he ind1_data (where the pops data is)
ind1_data_without_altassesments<-ind1_data[ind1_data$X_uuid %in% indicators_averaged_one$X_uuid, ]
# the number of rows is the number of pops counting only once multiassessed taxa
nrow(ind1_data_without_altassesments)
## [1] 5271
How many multiassesments:
sum(indicators_full$multiassessment=="multiassessment")
## [1] 91
Which taxa had multiassesments and how many:
x<- indicators_full %>% filter(multiassessment=="multiassessment") %>%
group_by(taxon) %>%
summarise(n=n())
kable(x)
Alasmidonta varicosa |
2 |
Alouatta palliata mexicana |
2 |
Ambystoma cingulatum |
2 |
Anguis fragilis |
2 |
Aphelocoma coerulescens |
3 |
Astragalus microcymbus |
2 |
Barbastella barbastellus |
2 |
Bombus terricola |
2 |
Cambarus elkensis |
2 |
Coronella austriaca |
2 |
Cryptobranchus alleganiensis alleganiensis |
2 |
Cryptomastix devia |
2 |
Erimystax harryi |
2 |
Etheostoma chienense |
2 |
Etheostoma osburni |
2 |
Hemphillia burringtoni |
2 |
Heterelmis stephani |
2 |
Hydroprogne caspia |
2 |
Lavinia exilicauda chi |
2 |
Lepidium papilliferum |
2 |
Mustela nigripes |
2 |
Necturus lewisi |
2 |
Nicrophorus americanus |
2 |
Notophthalmus perstriatus |
4 |
Notropis mekistocholas |
2 |
Notropis topeka |
2 |
Noturus munitus |
2 |
Obovaria subrotunda |
2 |
Oncorhynchus apache |
2 |
Oncorhynchus clarkii virginalis |
2 |
Phonotimpus talquian |
2 |
Pimelea spinescens subspecies spinescens |
2 |
Plestiodon egregius egregius |
2 |
Pleurobema rubrum |
2 |
Procambarus orcinus |
2 |
Pseudemys rubriventris |
2 |
Rana dalmatina |
2 |
Rhynchospora crinipes |
2 |
Streptanthus bracteatus |
2 |
Texella reyesi |
2 |
Thamnophis sirtalis tetrataenia |
2 |
Thoburnia atripinnis |
2 |
Toxolasma lividum |
2 |
Zapus hudsonius luteus |
2 |
How many taxa with multiassesments?
nrow(x)
## [1] 44
Fill gaps in Table S1
These numbers are not reported in the main text, but it is useful to
have them in Table S1 to better understand the dataset.
Number of taxa with No data on PM indicator in both
of the alternative assesments.
sum(is.na(indicators_averaged_one$indicator2_mean))
## [1] 401
Percentage out of 919:
sum(is.na(indicators_averaged_one$indicator2_mean))/nrow(indicators_averaged_one)
## [1] 0.4363439
Number of taxa with data on Ne 500 indicator (for at
least 1 pop) in both of the alternative assesments.
sum(!is.na(indicators_averaged_one$indicator1_mean))
## [1] 568
Percentage out of 919:
sum(!is.na(indicators_averaged_one$indicator1_mean))/nrow(indicators_averaged_one)
## [1] 0.6180631
Number of populations data, considering only one of the
alternative assessments
Number of pops:
nrow(ind1_data_without_altassesments)
## [1] 5271
df<-ind1_data_without_altassesments %>%
mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "no data available")) %>%
group_by(Ne_calculated_from) %>%
summarise(n=n(),
percentage = (n / nrow(ind1_data_without_altassesments)) * 100)
kable(df, digits = 0)
genetic data |
222 |
4 |
NcPoint ratio |
1185 |
22 |
NcRange ratio |
2862 |
54 |
no data available |
1002 |
19 |
Plain Histogram and stats for Ne > 500 indicator
Plain histogram:
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator1_mean)) +
geom_histogram( bins = 25, fill="grey30") + # Adjust the number of bins as needed
labs(x = "Proportion of populations within species with Ne>500 \n(Ne 500 indicator)", y = "Frequency") +
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).
Summary stats for the Ne 500 indicator:
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
ungroup() %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
x
kable(x, digits = 2)
568 |
0.27 |
0 |
58.1 |
66.55 |
81.34 |
19.19 |
18.66 |
Data availability for the Ne indicator. At the species level:
sum(!is.na(indicators_averaged_one$indicator1_mean)) / nrow(indicators_averaged_one)
## [1] 0.6180631
At the population level:
sum(!is.na(ind1_data$Ne_combined)) / nrow(ind1_data)
## [1] 0.811925
Populations below the Ne 500 threshold
x<- ind1_data %>%
ungroup() %>%
summarise(n_pops = n(),
n_pops_Ne_data = sum(!is.na(Ne_combined)),
n_pops_more_500 = sum(Ne_combined >= 500, na.rm = TRUE),
n_pops_less_500 =sum(Ne_combined < 500, na.rm = TRUE),
per_less_500 = n_pops_less_500/n_pops_Ne_data)
kable(x, digits=2)
Plain Histogram and stats for Proportion Mantained populations
Plain histogram
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator2_mean)) +
geom_histogram(bins = 25, fill="grey30") + # Adjust the number of bins as needed
labs(x = "Proportion of populations maintained within species \n(PM indicator)", y = "Frequency") +
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).
Summary stats for the PM indicator:
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
ungroup() %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
per0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75) / n *100,
per1=sum(indicator2_mean==1) / n *100)
kable(x, digits = 2)
518 |
0.82 |
1 |
0.58 |
2.9 |
40.73 |
68.92 |
53.47 |
Data availability, donuts and plot bars for Ne 500
Species level yes/no table with percentages for Ne 500 indicator
df<- indicators_full %>%
group_by(popsize_data) %>%
summarise(n=n(),
percentage = (n / nrow(metadata)) * 100)
kable(df, digits = 0)
data_for_species |
130 |
13 |
insuff_data_species |
216 |
22 |
yes |
613 |
63 |
NA |
7 |
1 |
Donut only available data
df<- indicators_full %>%
filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
group_by(popsize_data) %>%
summarise(n=n(),
percentage = (n / nrow(metadata)) * 100)
# variable to make change the size of the hole
hsize <- 2 # to change the size of the hole. larger=bigger
df <- df %>%
mutate(x = hsize)
# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = popsize_data)) +
geom_col() +
coord_polar(theta = "y") +
scale_fill_manual(values=c("#2ca02c", "grey80"),
breaks=c("yes", "insuff_data_species"),
labels=c("Population level", "Insufficient data")) +
xlim(c(0.2, hsize + 0.5)) + theme_void()
p
Species level yes/no. Bar plot for Ne 500
indicators_full %>%
filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
ggplot(aes(x=country_assessment, fill = popsize_data)) +
geom_bar(position = "fill", color="white") +
scale_fill_manual(values=c("#2ca02c", "grey80"),
breaks=c("yes", "insuff_data_species"),
labels=c("Population level", "Insufficient data")) +
scale_x_discrete(limits=rev) + xlab("") + ylab("Data availability (% of species)") +
coord_flip() +
theme_light()
Population level, what kind? Table
# we first need the column numbers
df<-ind1_data %>%
mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "no data available")) %>%
group_by(Ne_calculated_from) %>%
summarise(n=n(),
percentage = (n / nrow(ind1_data)) * 100)
kable(df, digits = 0)
genetic data |
349 |
6 |
NcPoint ratio |
1266 |
22 |
NcRange ratio |
2974 |
53 |
no data available |
1063 |
19 |
Donut
# variable to make change the size of the hole
hsize <- 3 # to change the size of the hole. larger=bigger
df <- df %>%
mutate(x = hsize)
# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = Ne_calculated_from)) +
geom_col() +
coord_polar(theta = "y") +
scale_fill_manual(labels=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
values=c("darkgreen", "#0072B2", "#E69F00", "grey80")) +
xlim(c(0.2, hsize + 0.5)) + theme_void()
p
Data availability for PM indicator
Total taxa with NA in extinct populations:
sum(is.na(indicators_full$n_extint_populations))
## [1] 416
Percentage of missing data
sum(is.na(indicators_full$n_extint_populations))/nrow(indicators_full)
## [1] 0.4306418
Total taxa with data availability on extinct pops
sum(!is.na(indicators_full$n_extint_populations))
## [1] 550
Percentage of taxa with data availability on extinct pops (which also
includes NA for extant, see above)
sum(!is.na(indicators_full$n_extint_populations))/nrow(indicators_full)
## [1] 0.5693582
nrow(indicators_full)
## [1] 966
Data availability for at least one indicator
Data availability for at least one indicator. Including
multiassesments
# number
x<- indicators_full %>%
filter(popsize_data=="yes" | !is.na(n_extint_populations))
nrow(x)
## [1] 802
# percentage
nrow(x) / nrow(indicators_full)
## [1] 0.8302277
Data availability for at least one indicator. Keeping only one of the
multiassesments
# number
x<- indicators_averaged_one %>%
filter(popsize_data=="yes" | !is.na(n_extint_populations))
nrow(x)
## [1] 765
# percentage
nrow(x) / nrow(indicators_averaged_one)
## [1] 0.8324266
Exploring Ne 500 values if using a different Ne/Nc ratio
Because the Ne/Nc does not necessarely needs to be 0.1, wenterested
to know if the distribution of values shifts to the right (better
indicator status) if we used other ratio, like 0.2 or 0.3, which is a
better ratio for some taxa. This would show uncertainty and emphasize
more work to be done on the Ne/Nc ratio. Lastly it may help address
concerns that Nc 5000 is just infeasible, because there are outlier
species which never had Nc>5000, e.g. some endimic birds, but these
are cases where Ne might be >500 (because Ne/Nc is >0.25).
To address this, we:
- Explore the NcPoint data converting it to Ne with a ratio of 0.2 and
0.3 and examine the indicator value
- Explore the differences in the indicator value of the taxa for which
we have a different Ne/Nc ratio.
Ne 500 indicator using 0.2 and 03 on all taxa with NcPoint data
First we get the indicator 1 data again from scratch (because the
object ind1_data already has the transformed Ne etc)
# ind1 data form kobo
data_for_alternative_ratio<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
# bind data form templates
data_for_alternative_ratio<-rbind(data_for_alternative_ratio, ind1_data_from_templates)
Keep only populations for which we hav NcPoint data:
# filter rows with missing data for NcPoint
data_for_alternative_ratio<-data_for_alternative_ratio %>%
filter(!is.na(NcPoint))
# show data to check
data_for_alternative_ratio %>% select(taxon, Ne, NcRange, NcPoint)
How many pops and assessments this subset of data has?
# How many pops?
nrow(data_for_alternative_ratio)
## [1] 1303
# how many assesments?
length(unique(data_for_alternative_ratio$X_uuid))
## [1] 197
# how many taxa?
length(unique(data_for_alternative_ratio$taxon))
## [1] 186
# how many assessments by taxonomic groups
data_for_alternative_ratio %>% group_by(X_uuid, taxonomic_group) %>%
summarise(n=n()) %>% group_by(taxonomic_group) %>%
summarise(n=n())
## `summarise()` has grouped output by 'X_uuid'. You can override using the
## `.groups` argument.
# how many assessments by country
data_for_alternative_ratio %>% group_by(X_uuid, country_assessment) %>%
summarise(n=n()) %>% group_by(country_assessment) %>%
summarise(n=n())
## `summarise()` has grouped output by 'X_uuid'. You can override using the
## `.groups` argument.
Now we transform NcPoint data to Ne using 0.1, 0.2 and 0.3 ratios
data_for_alternative_ratio <- data_for_alternative_ratio %>%
mutate(
Ne_0.1=NcPoint*0.1,
Ne_0.2=NcPoint*0.2,
Ne_0.3=NcPoint*0.3)
# show data to check
data_for_alternative_ratio %>% select(NcPoint, Ne_0.1, Ne_0.2, Ne_0.3)
Estimate the Ne 500 indciator for each of the 3 Ne values
ind1_alt_ratios<-data_for_alternative_ratio %>%
group_by(X_uuid, taxonomic_group, taxon) %>%
summarise(n_pops=n(),
n_pops_Ne_data=sum(!is.na(NcPoint)),
n_more_500_Ne_0.1=sum(Ne_0.1>500, na.rm=TRUE),
n_more_500_Ne_0.2=sum(Ne_0.2>500, na.rm=TRUE),
n_more_500_Ne_0.3=sum(Ne_0.3>500, na.rm=TRUE),
ind1_Ne_0.1=n_more_500_Ne_0.1/n_pops_Ne_data,
ind1_Ne_0.2=n_more_500_Ne_0.2/n_pops_Ne_data,
ind1_Ne_0.3=n_more_500_Ne_0.3/n_pops_Ne_data) %>%
ungroup()
## `summarise()` has grouped output by 'X_uuid', 'taxonomic_group'. You can
## override using the `.groups` argument.
ind1_alt_ratios %>% select(taxonomic_group, taxon, ind1_Ne_0.1, ind1_Ne_0.2, ind1_Ne_0.3)
Summary for each ratio:
summary(ind1_alt_ratios$ind1_Ne_0.1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.279 0.600 1.000
summary(ind1_alt_ratios$ind1_Ne_0.2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.3392 0.8889 1.0000
summary(ind1_alt_ratios$ind1_Ne_0.3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.1250 0.3957 1.0000 1.0000
Same data in tidy format for plots and other data summaries:
ind1_alt_ratios_long<-pivot_longer(ind1_alt_ratios,
cols = ind1_Ne_0.1:ind1_Ne_0.3,
names_to="NeNc_ratio",
values_to="ind1_value")
ind1_alt_ratios_long %>% select(taxonomic_group, taxon, NeNc_ratio, ind1_value)
Histogram faceting:
#nicer labels
ind1_alt_ratios_long$NeNc_ratio<-as.factor(ind1_alt_ratios_long$NeNc_ratio)
levels(ind1_alt_ratios_long$NeNc_ratio)<-c("0.1", "0.2", "0.3")
hist_ratios <- ind1_alt_ratios_long %>% ggplot(aes(x=ind1_value, fill=NeNc_ratio)) +
geom_histogram(position = 'identity') +
facet_wrap("NeNc_ratio") +
theme_light() + xlab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
theme(panel.border = element_blank(), legend.position="none")
hist_ratios
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Boxplots:
boxplot_ratios<-ind1_alt_ratios_long %>% ggplot(aes(x=NeNc_ratio, y=ind1_value, fill=NeNc_ratio)) +
geom_boxplot() +
geom_jitter(size=.5, width = 0.1) +
ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
xlab("Ne/Nc ratio") +
theme_light() + theme(legend.position="none")
boxplot_ratios
Table for the taxa for which we have a different Ne/Nc ratio.
Select taxa for which we have an alternative known NeNc ratio:
sp_known_ratios<-metadata %>% filter(!is.na(ratio_species_related)) %>% select(taxon, common_name, taxonomic_group, country_assessment, ratio_species_related)
select(sp_known_ratios, taxon, ratio_species_related, country_assessment)
Remove the elephant, because Jess confirmed <0.1 is 0.1, so no
change:
sp_known_ratios<-filter(sp_known_ratios, taxon != "Loxodonta africana")
Create a new variable with only numeric values for the ratio. For
this, when ranges were provided we take the average.
sp_known_ratios <-sp_known_ratios %>% mutate(
known_ratio=case_when(
# If ratio_species_related is a single value (with a decimal point), keep it as it is
grepl("^\\d+\\.?\\d*$", ratio_species_related) ~ as.numeric(ratio_species_related),
# manually change data that includes ranges or other formats. One by one:
ratio_species_related == "0.26 (0.1-0.3)" ~ 0.26,
ratio_species_related == "1:5" ~ 0.2,
ratio_species_related == "0.09-0.604" ~ mean(c(0.09,0.604)),
ratio_species_related == "0.36 (0.275-0.424)" ~ 0.36,
ratio_species_related == "0.1275-0.237" ~ mean(c(0.1275,0.237)),
ratio_species_related == "0.16-0.78" ~ mean(c(0.16,0.78))
))
## Warning in eval_tidy(pair$rhs, env = default_env): NAs introduced by coercion
select(sp_known_ratios, ratio_species_related, known_ratio)
Add English names:
# keep only english names
sp_known_ratios<-sp_known_ratios %>%
mutate(english_name = str_extract_all(common_name, "(?<=;|^)\\s*([^;(]+)\\s*\\(EN\\)") %>%
sapply(function(x) ifelse(length(x) > 0, x, NA_character_)))
Show nice table
x<-select(sp_known_ratios, taxon, taxonomic_group, english_name, country_assessment, ratio_species_related, known_ratio)
kable(x)
Alces alces |
mammal |
Moose (EN) |
Sweden |
0.28 |
0.28000 |
Syncerus caffer caffer |
mammal |
Cape buffalo (EN) |
South Africa |
0.26 (0.1-0.3) |
0.26000 |
Tetrao urogallus |
bird |
Western capercaillie (EN) |
France |
0.15 |
0.15000 |
Canis lupus |
mammal |
wolf (EN) |
Sweden |
0.25 |
0.25000 |
Poicephalus robustus |
bird |
NA |
South Africa |
0.262 |
0.26200 |
Gulo gulo |
mammal |
Wolverine (EN) |
Sweden |
0.36 |
0.36000 |
Mustela lutreola |
mammal |
European Mink (EN) |
France |
0.258 |
0.25800 |
Ursus arctos |
mammal |
Brown bear (EN) |
Sweden |
0.21 |
0.21000 |
Centrocercus minimus |
bird |
NA |
USA |
1:5 |
0.20000 |
Pelophylax esculentus |
amphibian |
Edible frog (EN) |
Sweden |
0.082 |
0.08200 |
Ursus arctos arctos |
mammal |
Brown bear (EN) |
France |
0.09-0.604 |
0.34700 |
Lynx lynx carpathicus |
mammal |
Eurasian lynx (EN) |
France |
0.36 (0.275-0.424) |
0.36000 |
Triturus cristatus |
amphibian |
Great crested newt (EN) |
Sweden |
0.1275-0.237 |
0.18225 |
Puffinus puffinus |
bird |
Manx shearwater (EN) |
France |
0.7 |
0.70000 |
Falco naumanni |
bird |
Lesser kestrel (EN) |
France |
0.39 |
0.39000 |
Lepanthes eltoroensis |
angiosperm |
NA |
USA |
0.140 |
0.14000 |
Castor fiber |
mammal |
Eurasian Beaver (EN) |
Belgium |
0.25 |
0.25000 |
Lutra lutra |
mammal |
European otter (EN) |
Belgium |
0.25 |
0.25000 |
Cervus elaphus elaphus |
mammal |
Red deer (EN) |
Sweden |
0.23 |
0.23000 |
Thamnophis sirtalis tetrataenia |
reptile |
NA |
USA |
0.16-0.78 |
0.47000 |
Thamnophis sirtalis tetrataenia |
reptile |
NA |
USA |
0.16-0.78 |
0.47000 |
Quick summary of the new ranges:
summary(sp_known_ratios$known_ratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0820 0.2100 0.2580 0.2905 0.3600 0.7000
nrow(sp_known_ratios)
## [1] 21
table(sp_known_ratios$taxonomic_group)
##
## amphibian angiosperm bird mammal reptile
## 2 1 5 11 2
Of the taxa for which we have a known Ne/Nc ratio, which have NcPoint
data? (ie are present in data_for_alternative_ratio
). Keep
only those.
#list taxa with known ratios
unique(sp_known_ratios$taxon)
## [1] "Alces alces" "Syncerus caffer caffer"
## [3] "Tetrao urogallus" "Canis lupus"
## [5] "Poicephalus robustus" "Gulo gulo"
## [7] "Mustela lutreola" "Ursus arctos"
## [9] "Centrocercus minimus" "Pelophylax esculentus"
## [11] "Ursus arctos arctos" "Lynx lynx carpathicus"
## [13] "Triturus cristatus" "Puffinus puffinus"
## [15] "Falco naumanni" "Lepanthes eltoroensis"
## [17] "Castor fiber" "Lutra lutra"
## [19] "Cervus elaphus elaphus" "Thamnophis sirtalis tetrataenia"
# are they in data_for_alternative_ratio?
unique(sp_known_ratios$taxon) %in% unique(data_for_alternative_ratio$taxon)
## [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE
## [13] FALSE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
Keep only the taxa with known alternative ratios and NcPoint
data:
sp_known_ratios_NcPoint<-sp_known_ratios[sp_known_ratios$taxon %in% unique(data_for_alternative_ratio$taxon), ]
x<-select(sp_known_ratios_NcPoint, taxon, taxonomic_group, english_name, country_assessment, ratio_species_related, known_ratio)
kable(x)
2 |
Syncerus caffer caffer |
mammal |
Cape buffalo (EN) |
South Africa |
0.26 (0.1-0.3) |
0.260 |
4 |
Canis lupus |
mammal |
wolf (EN) |
Sweden |
0.25 |
0.250 |
6 |
Gulo gulo |
mammal |
Wolverine (EN) |
Sweden |
0.36 |
0.360 |
9 |
Centrocercus minimus |
bird |
NA |
USA |
1:5 |
0.200 |
10 |
Pelophylax esculentus |
amphibian |
Edible frog (EN) |
Sweden |
0.082 |
0.082 |
11 |
Ursus arctos arctos |
mammal |
Brown bear (EN) |
France |
0.09-0.604 |
0.347 |
12 |
Lynx lynx carpathicus |
mammal |
Eurasian lynx (EN) |
France |
0.36 (0.275-0.424) |
0.360 |
15 |
Falco naumanni |
bird |
Lesser kestrel (EN) |
France |
0.39 |
0.390 |
16 |
Lepanthes eltoroensis |
angiosperm |
NA |
USA |
0.140 |
0.140 |
17 |
Castor fiber |
mammal |
Eurasian Beaver (EN) |
Belgium |
0.25 |
0.250 |
19 |
Cervus elaphus elaphus |
mammal |
Red deer (EN) |
Sweden |
0.23 |
0.230 |
20 |
Thamnophis sirtalis tetrataenia |
reptile |
NA |
USA |
0.16-0.78 |
0.470 |
21 |
Thamnophis sirtalis tetrataenia |
reptile |
NA |
USA |
0.16-0.78 |
0.470 |
Use NcPoint data from data_for_alternative_ratio witht he known_ratio
for each species to transform the NcPoint data to Ne:
# to store loop output
data_for_table_altratio<-data.frame()
## loop through taxa with Ne kwnon ratio and NcPoint data
for(i in 1:length(unique(sp_known_ratios_NcPoint$taxon))){
# get desired taxon
desired_taxon <- unique(sp_known_ratios_NcPoint$taxon)[i]
# get known ratio for desired taxon
known_ratio <- sp_known_ratios_NcPoint[sp_known_ratios_NcPoint$taxon== desired_taxon, "known_ratio"]
# for each pop of desired taxon, estimate Ne from the known ratio
x<-filter(data_for_alternative_ratio, taxon == desired_taxon) %>%
mutate(Ne_known_ratio = NcPoint*known_ratio,
known_ratio = known_ratio[1])
data_for_table_altratio<-rbind(data_for_table_altratio, x)
}
# check
data_for_table_altratio %>% select(taxon, NcPoint, Ne_0.1, Ne_0.2, Ne_0.3, known_ratio, Ne_known_ratio)
Estimate the indicator values for the 4 Ne options (0.1, 0.2, 0.3,
known):
ind1_tablesp_ratios<-data_for_table_altratio %>%
group_by(X_uuid, taxonomic_group, taxon, known_ratio) %>%
summarise(n_pops=n(),
n_pops_Ne_data=sum(!is.na(NcPoint)),
n_more_500_Ne_0.1=sum(Ne_0.1>500, na.rm=TRUE),
n_more_500_Ne_0.2=sum(Ne_0.2>500, na.rm=TRUE),
n_more_500_Ne_0.3=sum(Ne_0.3>500, na.rm=TRUE),
n_more_500_Ne_known_ratio=sum(Ne_known_ratio>500, na.rm=TRUE),
ind1_Ne_0.1=n_more_500_Ne_0.1/n_pops_Ne_data,
ind1_Ne_0.2=n_more_500_Ne_0.2/n_pops_Ne_data,
ind1_Ne_0.3=n_more_500_Ne_0.3/n_pops_Ne_data,
ind1_Ne_known_ratio=n_more_500_Ne_known_ratio/n_pops_Ne_data) %>%
ungroup()
## `summarise()` has grouped output by 'X_uuid', 'taxonomic_group', 'taxon'. You
## can override using the `.groups` argument.
kable(select(ind1_tablesp_ratios, -starts_with("n_more"),
-X_uuid), digits = 2)
mammal |
Castor fiber |
0.25 |
1 |
1 |
1.00 |
1.00 |
1.00 |
1.00 |
reptile |
Thamnophis sirtalis tetrataenia |
0.47 |
3 |
3 |
0.00 |
0.00 |
0.00 |
0.00 |
reptile |
Thamnophis sirtalis tetrataenia |
0.47 |
3 |
3 |
0.00 |
0.00 |
0.00 |
0.00 |
mammal |
Canis lupus |
0.25 |
1 |
1 |
0.00 |
0.00 |
0.00 |
0.00 |
mammal |
Gulo gulo |
0.36 |
1 |
1 |
0.00 |
0.00 |
0.00 |
0.00 |
mammal |
Cervus elaphus elaphus |
0.23 |
1 |
1 |
1.00 |
1.00 |
1.00 |
1.00 |
mammal |
Syncerus caffer caffer |
0.26 |
3 |
3 |
0.33 |
0.67 |
0.67 |
0.67 |
bird |
Centrocercus minimus |
0.20 |
7 |
7 |
0.00 |
0.00 |
0.14 |
0.00 |
mammal |
Lynx lynx carpathicus |
0.36 |
1 |
1 |
0.00 |
0.00 |
0.00 |
0.00 |
amphibian |
Pelophylax esculentus |
0.08 |
1 |
1 |
1.00 |
1.00 |
1.00 |
1.00 |
angiosperm |
Lepanthes eltoroensis |
0.14 |
1 |
1 |
0.00 |
1.00 |
1.00 |
0.00 |
bird |
Falco naumanni |
0.39 |
3 |
3 |
0.00 |
0.00 |
0.00 |
0.00 |
mammal |
Ursus arctos arctos |
0.35 |
1 |
1 |
0.00 |
0.00 |
0.00 |
0.00 |
Long format for plots:
ind1_tablesp_long<-pivot_longer(ind1_tablesp_ratios,
cols = c(ind1_Ne_0.1:ind1_Ne_0.3,ind1_Ne_known_ratio),
names_to="NeNc_ratio",
values_to="ind1_value")
Plot
ind1_tablesp_long %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator1) > 1) %>%
# plot
ggplot(aes(x=taxon, y=ind1_value)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=NeNc_ratio, shape=NeNc_ratio), alpha=0.6, size=2) +
xlab("") + ylab("Proportion of populations within species with Ne>500 \n(Ne 500 indicator)") +
ylim(0, 1)+
coord_flip() +
theme_light() +
theme(axis.text.y = element_text(face = "italic"), panel.border = element_blank(), legend.position="right", text= element_text(size=13))
Session Info for reproducibility purposes:
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] glmmTMB_1.1.7 knitr_1.39 lme4_1.1-31 Matrix_1.5-3
## [5] cowplot_1.1.1 viridis_0.6.3 viridisLite_0.4.0 alluvial_0.1-2
## [9] ggnewscale_0.4.9 ggsankey_0.0.99999 ggplot2_3.4.1 stringr_1.4.0
## [13] utile.tools_0.2.7 readr_2.1.2 dplyr_1.0.9 tidyr_1.2.0
##
## loaded via a namespace (and not attached):
## [1] TMB_1.9.6 tidyselect_1.1.2 xfun_0.31
## [4] bslib_0.3.1 purrr_0.3.4 splines_4.2.1
## [7] lattice_0.20-45 colorspace_2.0-3 vctrs_0.5.2
## [10] generics_0.1.3 htmltools_0.5.5 yaml_2.3.5
## [13] utf8_1.2.2 rlang_1.0.6 nloptr_2.0.3
## [16] jquerylib_0.1.4 pillar_1.7.0 glue_1.6.2
## [19] withr_2.5.0 DBI_1.1.3 lifecycle_1.0.3
## [22] munsell_0.5.0 gtable_0.3.0 evaluate_0.15
## [25] labeling_0.4.2 tzdb_0.3.0 fastmap_1.1.0
## [28] fansi_1.0.3 highr_0.9 Rcpp_1.0.10
## [31] scales_1.2.0 jsonlite_1.8.0 farver_2.1.1
## [34] gridExtra_2.3 hms_1.1.1 digest_0.6.29
## [37] stringi_1.7.6 numDeriv_2016.8-1.1 grid_4.2.1
## [40] cli_3.6.0 tools_4.2.1 magrittr_2.0.3
## [43] sass_0.4.1 tibble_3.1.7 crayon_1.5.1
## [46] pkgconfig_2.0.3 MASS_7.3-57 ellipsis_0.3.2
## [49] minqa_1.2.5 assertthat_0.2.1 rmarkdown_2.14
## [52] rstudioapi_0.13 boot_1.3-28 R6_2.5.1
## [55] nlme_3.1-157 compiler_4.2.1