final modifications

024d45cf · Ahmed CHOUKRI · db934354 · 024d45cf · 024d45cf · 024d45cf
--- a/R/data_import_germination.R
+++ b/R/data_import_germination.R
@@ -10,11 +10,5 @@

 import_and_prepare_data <- function(file_path) {
  # Import data from Excel
-  data <- read_excel(file_path)

-  # Ensure WPI is a factor
-  data <- data %>%
-    mutate(WPI = as.factor(WPI))
-
-  return(data)
 }
--- a/R/distribution analysis 2.R
+++ b/R/distribution analysis 2.R
+#' Fit and Evaluate Distributions for Relative Germination Data
+#'
+#' This function adjusts the relative germination values if necessary, fits multiple distributions,
+#' evaluates goodness-of-fit statistics, and plots each fitted distribution for visual inspection.
+#'
+#' @param data A data frame containing a column `attachement` with numeric values.
+#'
+#' @return A list containing fitted distribution objects and goodness-of-fit statistics.
+#' @export
+#'
+#' @examples
+#' # Example usage:
+#' data <- read_excel("C:/Users/Choukri-A/Desktop/HPLC/AMAP/test.xlsx")
+#' data$WPI <- as.factor(data$WPI)
+#' fit_results <- fit_and_evaluate_distributions(data)
+
+
+fit_and_evaluate_distributions_attach <- function(data) {
+  data <- data %>%
+    mutate(WPI = as.factor(WPI))
+
+  # Adjust values for Beta distribution if needed
+  data$attachement <- ifelse(data$attachement == 0, 0.001, data$attachement)
+  data$attachement <- ifelse(data$attachement == 1, 0.999, data$attachement)
+
+  # Fit various distributions
+  fits <- list(
+    normal = fitdist(data$attachement, "norm"),
+    exponential = fitdist(data$attachement, "exp"),
+    gamma = fitdist(data$attachement[data$attachement > 0], "gamma"),
+    log_normal = fitdist(data$attachement[data$attachement > 0], "lnorm"),
+    weibull = fitdist(data$attachement[data$attachement > 0], "weibull"),
+    cauchy = fitdist(data$attachement, "cauchy")
+  )
+
+  # Fit Beta distribution if all values are in (0, 1)
+  if (all(data$attachement > 0 & data$attachement < 1)) {
+    fits$beta <- fitdist(data$attachement, "beta")
+  } else {
+    warning("Skipping Beta distribution as data is not within (0, 1).")
+  }
+
+  # Print summaries of each distribution
+  summaries <- lapply(fits, summary)
+  print(summaries)
+
+  # Plot all fits using ggplot2
+  plot_list <- list()
+  for (fit_name in names(fits)) {
+    fit <- fits[[fit_name]]
+    denscomp_plot <- denscomp(list(fit), plotstyle = "ggplot") +
+      ggtitle(paste(fit_name, "fit")) +
+      theme_minimal()
+    plot_list[[fit_name]] <- denscomp_plot
+  }
+
+  # Arrange plots in a grid
+  grid_plot <- ggarrange(plotlist = plot_list, ncol = 2, nrow = 3)
+  print(grid_plot)
+
+  # Goodness-of-fit statistics for comparison
+  gof <- gofstat(fits)
+  print(gof)
+
+  return(list(fits = fits, summaries = summaries, gof = gof, plots = plot_list))
+}
--- a/R/distribution analysis.R
+++ b/R/distribution analysis.R
@@ -13,11 +13,13 @@
 #' data <- read_excel("C:/Users/Choukri-A/Desktop/HPLC/AMAP/test.xlsx")
 #' data$WPI <- as.factor(data$WPI)
 #' fit_results <- fit_and_evaluate_distributions(data)
-library(fitdistrplus)
-library(ggplot2)
+

 fit_and_evaluate_distributions <- function(data) {
-  # Adjust values for Beta distribution if needed
+  data <- data %>%
+    mutate(WPI = as.factor(WPI))
+
+    # Adjust values for Beta distribution if needed
  data$relative.germination <- ifelse(data$relative.germination == 0, 0.001, data$relative.germination)
  data$relative.germination <- ifelse(data$relative.germination == 1, 0.999, data$relative.germination)


--- a/R/load_dependencies.R
+++ b/R/load_dependencies.R
-# Load required packages
-if(!require("fitdistrplus")) install.packages("fitdistrplus", dependencies = TRUE)
-if(!require("readxl")) install.packages("readxl", dependencies = TRUE)
-library(fitdistrplus)
-library(readxl)
-library(dplyr)
+load_dependencies <- function() {
+  packages <- c("fitdistrplus", "readxl", "dplyr", "stats", "emmeans", "ggplot2", "ggpubr")
+
+  # Install missing packages
+  to_install <- packages[!packages %in% installed.packages()[, "Package"]]
+  if (length(to_install) > 0) {
+    install.packages(to_install, dependencies = TRUE)
+  }
+
+  # Load all packages
+  lapply(packages, library, character.only = TRUE)
+}
+
--- a/R/plot1.R
+++ b/R/plot1.R
@@ -43,7 +43,7 @@ plot_germination <- function(data, model_summary, y_scale = c(0, 1.2),
    stringsAsFactors = FALSE
  )

-  # Print the significance annotations
+  # Print the significance annotations for debugging
  print(sig_annotations)

  # Create the plot
@@ -65,7 +65,7 @@ plot_germination <- function(data, model_summary, y_scale = c(0, 1.2),
    ) +
    theme_minimal() +
    theme(
-      axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 14),
+      axis.text.x = element_text(angle = , hjust = 1, vjust = 1, size = 14),
      axis.title = element_text(face = "bold", size = 14),
      axis.text.y = element_text(size = 11),
      axis.title.y = element_text(face = "bold", size = 14),
@@ -86,19 +86,24 @@ plot_germination <- function(data, model_summary, y_scale = c(0, 1.2),
  for (i in 1:nrow(sig_annotations)) {
    if (!is.na(sig_annotations$significance[i]) && sig_annotations$significance[i] != "") {
      coefficient <- sig_annotations$coefficient[i]
-      if (grepl("WPI", coefficient)) {
-        wpi <- as.numeric(gsub("WPI", "", coefficient))
-        genotype <- "Intercept"
-      } else {
-        genotype <- gsub("Genotype", "", coefficient)
-        wpi <- 1
-      }
+
+      # Extract WPI and Genotype using regex
+      wpi_match <- regmatches(coefficient, regexec("WPI(\\d)", coefficient))
+      genotype_match <- regmatches(coefficient, regexec("Genotype(\\w+)", coefficient))
+
+      wpi <- as.numeric(unlist(wpi_match)[2])
+      genotype <- if (length(genotype_match[[1]]) > 1) unlist(genotype_match[[1]])[2] else "Intercept"
+
+      # Check if WPI and Genotype exist in summary_data
      mean_value <- summary_data %>%
        filter(WPI == wpi & Genotype == genotype) %>%
        pull(mean_germination)
+
      if (length(mean_value) > 0) {
        p <- p + annotate("text", x = wpi, y = mean_value + 0.05,
                          label = sig_annotations$significance[i], size = 6, color = "black")
+      } else {
+        message("Warning: Mean value not found for WPI:", wpi, " and Genotype:", genotype)
      }
    }
  }

--- a/R/plot2.R
+++ b/R/plot2.R
+#' Plot Germination Ratio with Mean and Standard Deviation
+#'
+#' This function reads in data, calculates summary statistics (mean and standard deviation),
+#' and plots the germination ratio for each Genotype across Weeks Post-Inoculation (WPI).
+#'
+#' @param data A data frame containing columns `Genotype`, `WPI`, and `relative.germination`.
+#' @param y_scale A numeric vector specifying the y-axis limits, default is `c(0, 1.2)`.
+#' @param custom_colors A character vector of colors to use for each Genotype.
+#' @param dodge_width A numeric value for dodging width, default is `0.3`.
+#'
+#' @return A ggplot object displaying germination ratio means with standard deviation error bars.
+#' @export
+#'
+#' @examples
+#' # Example usage:
+#' data <- read_excel("C:/Users/Choukri-A/Desktop/HPLC/AMAP/test.xlsx")
+#' data$WPI <- as.factor(data$WPI)
+#' plot_germination(data)
+plot_attach <- function(data, model_summary, y_scale = c(0, 210),
+                             custom_colors = c("#FF0000", "#00FF00", "#0000FF", "#800080", "#FF00FF", "#00FFFF"),
+                             dodge_width = 0.3) {
+  # Summarize data by calculating mean and standard deviation
+  summary_data <- data %>%
+    group_by(Genotype, WPI) %>%
+    summarise(
+      mean_germination = mean(attachement, na.rm = TRUE),
+      sd_germination = sd(attachement, na.rm = TRUE)
+    )
+
+  # Extract p-values from GLM results
+  p_values <- model_summary$coefficients[, "Pr(>|t|)"]
+  coefficients <- rownames(model_summary$coefficients)
+
+  # Determine significance levels
+  significance_levels <- ifelse(p_values < 0.0001, "***",
+                                ifelse(p_values < 0.001, "**",
+                                       ifelse(p_values < 0.05, "*", "")))
+
+  # Create a data frame for significance annotations
+  sig_annotations <- data.frame(
+    coefficient = coefficients,
+    significance = significance_levels,
+    stringsAsFactors = FALSE
+  )
+
+  # Print the significance annotations for debugging
+  print(sig_annotations)
+
+  # Create the plot
+  p <- ggplot(summary_data, aes(x = factor(WPI),
+                                y = mean_germination,
+                                group = Genotype,
+                                color = factor(Genotype, levels = unique(Genotype)))) +
+    geom_line(position = position_dodge(width = dodge_width), size = 1.2) +
+    geom_point(position = position_dodge(width = dodge_width), size = 3) +
+    geom_errorbar(aes(ymin = mean_germination - sd_germination, ymax = mean_germination + sd_germination),
+                  position = position_dodge(width = dodge_width),
+                  width = 0.2,
+                  size = 1,
+                  color = "black") +
+    labs(
+      x = "Weeks Post-Inoculation (WPI)",
+      y = "Attachement",
+      color = "Genotype"
+    ) +
+    theme_minimal() +
+    theme(
+      axis.text.x = element_text(angle = , hjust = 1, vjust = 1, size = 14),
+      axis.title = element_text(face = "bold", size = 14),
+      axis.text.y = element_text(size = 11),
+      axis.title.y = element_text(face = "bold", size = 14),
+      plot.title = element_text(face = "bold", size = 16),
+      legend.title = element_text(size = 16),
+      legend.text = element_text(face = "italic", size = 16),
+      panel.grid.major = element_line(color = "gray", linetype = "dashed"),
+      panel.grid.minor = element_blank(),
+      panel.border = element_blank(),
+      panel.background = element_blank(),
+      strip.text = element_text(size = 12, face = "bold")
+    ) +
+    scale_color_manual(values = custom_colors) +
+    coord_cartesian(ylim = y_scale) +
+    scale_x_discrete(limits = c("1", "2", "3", "4"))
+
+  # Add significance asterisks at the mean points
+  for (i in 1:nrow(sig_annotations)) {
+    if (!is.na(sig_annotations$significance[i]) && sig_annotations$significance[i] != "") {
+      coefficient <- sig_annotations$coefficient[i]
+
+      # Extract WPI and Genotype using regex
+      wpi_match <- regmatches(coefficient, regexec("WPI(\\d)", coefficient))
+      genotype_match <- regmatches(coefficient, regexec("Genotype(\\w+)", coefficient))
+
+      wpi <- as.numeric(unlist(wpi_match)[2])
+      genotype <- if (length(genotype_match[[1]]) > 1) unlist(genotype_match[[1]])[2] else "Intercept"
+
+      # Check if WPI and Genotype exist in summary_data
+      mean_value <- summary_data %>%
+        filter(WPI == wpi & Genotype == genotype) %>%
+        pull(mean_germination)
+
+      if (length(mean_value) > 0) {
+        p <- p + annotate("text", x = wpi, y = mean_value + 0.05,
+                          label = sig_annotations$significance[i], size = 6, color = "black")
+      } else {
+        message("Warning: Mean value not found for WPI:", wpi, " and Genotype:", genotype)
+      }
+    }
+  }
+
+  # Display plot
+  print(p)
+
+  # Return the ggplot object
+  return(p)
+}
--- a/R/statistical_analysis 2.R
+++ b/R/statistical_analysis 2.R
+#' Fit GLM and Perform Post Hoc Analysis for Relative Germination Data
+#'
+#' This function fits a generalized linear model (GLM) for `attachement` based on `WPI` and `Genotype`,
+#' calculates estimated marginal means, and performs a Tukey post hoc test for pairwise comparisons.
+#'
+#' @param data A data frame containing columns `attachement`, `WPI`, and `Genotype`.
+#' @param family The family for the GLM; default is Gamma with a log link.
+#'
+#' @return A list containing the model summary, estimated marginal means, and Tukey post hoc test results.
+#' @export
+#'
+#' @examples
+#' # Example usage:
+#' data$WPI <- as.factor(data$WPI)
+#' glm_results <- fit_glm_and_posthoc(data)
+fit_glm_and_posthoc_attach <- function(data, family = Gamma(link = "log")) {
+  # Load necessary libraries
+  data <- data %>%
+    mutate(WPI = as.factor(WPI))
+
+  # Adjust values for Beta distribution if needed
+  data$attachement <- ifelse(data$attachement == 0, 0.001, data$attachement)
+
+  # Fit the generalized linear model
+  model <- glm(attachement ~ WPI * Genotype, data = data, family = family)
+
+  # Summarize the model
+  model_summary <<- summary(model)
+  print(model_summary)
+
+  # Check model diagnostics
+  plot(model)
+
+  # Calculate estimated marginal means for the interaction of WPI and Genotype
+  emm <- emmeans(model, ~ WPI * Genotype)
+  print(emm)
+
+  # Perform Tukey's post hoc test for the interaction of WPI and Genotype
+  tukey_result <- pairs(emm, adjust = "tukey")
+  print(tukey_result)
+
+  # Return the model, marginal means, and Tukey results
+  return(list(
+    model_summary = model_summary,
+    emm = emm,
+    tukey_result = tukey_result
+  ))
+}
--- a/R/statistical_analysis.R
+++ b/R/statistical_analysis.R
@@ -11,19 +11,21 @@
 #'
 #' @examples
 #' # Example usage:
-#' data <- read_excel("C:/Users/Choukri-A/Desktop/HPLC/AMAP/test.xlsx")
 #' data$WPI <- as.factor(data$WPI)
 #' glm_results <- fit_glm_and_posthoc(data)
 fit_glm_and_posthoc <- function(data, family = Gamma(link = "log")) {
  # Load necessary libraries
-  library(stats)
-  library(emmeans)
+  data <- data %>%
+    mutate(WPI = as.factor(WPI))

+  # Adjust values for Beta distribution if needed
+  data$relative.germination <- ifelse(data$relative.germination == 0, 0.001, data$relative.germination)
+  data$relative.germination <- ifelse(data$relative.germination == 1, 0.999, data$relative.germination)
  # Fit the generalized linear model
  model <- glm(relative.germination ~ WPI * Genotype, data = data, family = family)

  # Summarize the model
-  model_summary <- summary(model)
+  model_summary <<- summary(model)
  print(model_summary)

  # Check model diagnostics

--- a/README.md
+++ b/README.md
-# Minirhizotrons
+# AMAP : Designed for Minirhizotrons' downstream analysis 

+The aim of this package is to analyze the i. germination ratio parameter according the scored relative germination in each square and ii. the attachment values, these two parameters are analyzed in a comprehensive way including distribution analysis and automatized, with few custom changes, statistical analysis and appropriate plotting format using ggplot2.

+## Input table format for relative germination parameter

-## Main commands
+| WPI | Genotype | Square | Relative Germination |
+|-----|----------|--------|----------------------|
+| 1   | A        | 1      | 0                    |
+| 2   | A        | 1      | 0.4126               |
+| 3   | A        | 1      | 0.223221             |
+| 4   | A        | 1      | 0.262222             |
+| 1   | B        | 1      | 0                    |
+| 2   | B        | 1      | 0.22316              |
+| 3   | B        | 1      | 0.4122               |
+| 4   | B        | 1      | 0.7426               |

+## Input table format for attachement parameter

-```
+| WPI | Genotype | Attachment |
+|-----|----------|------------|
+| 1   | A        | 0          |
+| 2   | A        | 14         |
+| 3   | A        | 40         |
+| 4   | A        | 80         |
+| 1   | B        | 0          |
+| 2   | B        | 0          |
+| 3   | B        | 4          |
+| 4   | B        | 7          |

-library(AMAP)
-prepared_data <- import_and_prepare_data(choose.files())
-fit_results <- fit_and_evaluate_distributions(data)
-glm_results <- fit_glm_and_posthoc(data)
-plot_germination(data,  model_summary)

+## Main commands
+
+```         
+library(AMAP) #to load the library
+load_dependencies() 
+data <- read_excel(choose.files()) #select the data source file

+#For germination scoring :
+fit_results <- fit_and_evaluate_distributions(data) #This will generate a report of the distribution test results , pay close attention to the AIC parameter when evaluating
+fit_glm_and_posthoc (data) #run statitical test using a glm based on the distribution results
+plot_germination(data,  model_summary) #plot the results 
+
+#For attachement scoring
+fit_results <- fit_and_evaluate_distributions_attach (data) #This will generate a report of the distribution test results , pay close attention to the AIC parameter when evaluating
+fit_glm_and_posthoc_attach (data) #run statitical test using a glm based on the distribution results
+plot_attach (data,  model_summary)#plot the results 

 ```
+## For inqueries
+
+ahmed.choukri@univ-nantes.fr{.email} / choukriahmed.uca@yahoo.com{.email} 📬
+
--- a/test.xlsx
+++ b/test.xlsx