Develop new function based on mass_dataset class

If you want to develop new functions based on “mass_dataset” class in your package. You need to make the massdataset package as your package dependency.

DESCRIPTION

In DESCRIPTION file, set massdataset as your Imports and Remotes

Imports:
    massdataset
Remotes: 
    tidymass/massdataset

R code

Please set “mass_dataset” class as your new function primary input data. If the function is directory to process one component of “mass_dataset”, just get the component and then process it.

library(massdataset)
data("expression_data")
data("sample_info")
data("sample_info_note")
data("variable_info")
data("variable_info_note")
object =
  create_mass_dataset(
    expression_data = expression_data,
    sample_info = sample_info,
    variable_info = variable_info,
    sample_info_note = sample_info_note,
    variable_info_note = variable_info_note
  )
object
#> -------------------- 
#> massdataset version: 1.0.12 
#> -------------------- 
#> 1.expression_data:[ 1000 x 8 data.frame]
#> 2.sample_info:[ 8 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 1 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22

For example, we can create a function named get_class_test() to get “class” from “sample_info”.

get_class_test <- function(object){
  object@sample_info$class
}
get_class_test(object)
#> [1] "Blank"   "Blank"   "QC"      "QC"      "Subject" "Subject" "Subject"
#> [8] "Subject"

If the function is general, you can use the activate_mass_dataset() to tell you function which component you want to preocess.

For example, we can create a function named remove_blank() from “sample_info” or “expression_data”.

remove_blank <- function(object) {
  if (length(object@activated) == 0) {
    stop("activate_mass_dataset() first.\n")
  }
  
  if (object@activated == "sample_info") {
    sample_info <-
      object@sample_info
    sample_info <-
      sample_info %>%
      dplyr::filter(class != "Blank")
    object@sample_info <-
      sample_info
    object <-
      update_mass_dataset(object)
    return(object)
  }
  
  if (object@activated == "expression_data") {
    expression_data <-
      object@expression_data
    expression_data <-
      expression_data %>%
      dplyr::select(-dplyr::contains("Blank"))
    object@expression_data <-
      expression_data
    object <-
      update_mass_dataset(object)
    return(object)
  }
  
    if (object@activated != "expression_data" & object@activated != "sample_info") {
      stop("Only support sample_info or expression_data.\n")
  }
}
object %>% 
  remove_blank()
object %>% 
  activate_mass_dataset(what = "sample_info") %>% 
  remove_blank()
#> -------------------- 
#> massdataset version: 1.0.12 
#> -------------------- 
#> 1.expression_data:[ 1000 x 6 data.frame]
#> 2.sample_info:[ 6 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22
#> update_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset update_mass_dataset() 2022-08-07 19:21:22
object %>% 
  activate_mass_dataset(what = "expression_data") %>% 
  remove_blank()
#> -------------------- 
#> massdataset version: 1.0.12 
#> -------------------- 
#> 1.expression_data:[ 1000 x 6 data.frame]
#> 2.sample_info:[ 6 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22
#> update_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset update_mass_dataset() 2022-08-07 19:21:22
object %>% 
  activate_mass_dataset(what = "variable_info") %>% 
  remove_blank()

Make functions from other package support mass_dataset

If you want to make other functions from other package support “mass_dataset”. We have two examples here:

#' @title head
#' @method head mass_dataset
#' @param x x
#' @export
#' @rdname processing-mass_dataset
#' @return mass_dataset class object

head.mass_dataset = function(x, ...){
  x@expression_data = head(x@expression_data, ...)
  x = update_mass_dataset(x)
  return(x)
}
#' @title apply
#' @method apply mass_dataset
#' @param X X
#' @param MARGIN MARGIN
#' @param FUN FUN
#' @param ... ...
#' @param simplify simplify
#' @export
#' @rdname summary-mass_dataset
#' @return result

setMethod(f = "apply",
          signature(X = "mass_dataset"),
          function (X, MARGIN, FUN, ..., simplify = TRUE) {
            apply(as.matrix(X@expression_data),
                  MARGIN, FUN, ..., simplify = simplify)
          })

Session information

sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggplot2_3.3.6      magrittr_2.0.3     masstools_1.0.2    massdataset_1.0.12
#> 
#> loaded via a namespace (and not attached):
#>   [1] colorspace_2.0-3            rjson_0.2.21               
#>   [3] ellipsis_0.3.2              rprojroot_2.0.3            
#>   [5] circlize_0.4.15             XVector_0.36.0             
#>   [7] GenomicRanges_1.48.0        GlobalOptions_0.1.2        
#>   [9] fs_1.5.2                    clue_0.3-61                
#>  [11] rstudioapi_0.13             mzR_2.30.0                 
#>  [13] affyio_1.66.0               fansi_1.0.3                
#>  [15] codetools_0.2-18            ncdf4_1.19                 
#>  [17] doParallel_1.0.17           cachem_1.0.6               
#>  [19] impute_1.70.0               knitr_1.39                 
#>  [21] jsonlite_1.8.0              cluster_2.1.3              
#>  [23] vsn_3.64.0                  png_0.1-7                  
#>  [25] readr_2.1.2                 BiocManager_1.30.18        
#>  [27] compiler_4.2.1              httr_1.4.3                 
#>  [29] assertthat_0.2.1            Matrix_1.4-1               
#>  [31] fastmap_1.1.0               lazyeval_0.2.2             
#>  [33] limma_3.52.2                cli_3.3.0                  
#>  [35] htmltools_0.5.2             tools_4.2.1                
#>  [37] gtable_0.3.0                glue_1.6.2                 
#>  [39] GenomeInfoDbData_1.2.8      affy_1.74.0                
#>  [41] dplyr_1.0.9                 Rcpp_1.0.8.3               
#>  [43] MALDIquant_1.21             Biobase_2.56.0             
#>  [45] jquerylib_0.1.4             pkgdown_2.0.5              
#>  [47] vctrs_0.4.1                 preprocessCore_1.58.0      
#>  [49] iterators_1.0.14            xfun_0.31                  
#>  [51] stringr_1.4.0               openxlsx_4.2.5             
#>  [53] lifecycle_1.0.1             XML_3.99-0.10              
#>  [55] zlibbioc_1.42.0             MASS_7.3-57                
#>  [57] scales_1.2.0                MSnbase_2.22.0             
#>  [59] hms_1.1.1                   ragg_1.2.2                 
#>  [61] pcaMethods_1.88.0           MatrixGenerics_1.8.1       
#>  [63] ProtGenerics_1.28.0         parallel_4.2.1             
#>  [65] SummarizedExperiment_1.26.1 RColorBrewer_1.1-3         
#>  [67] ComplexHeatmap_2.12.0       yaml_2.3.5                 
#>  [69] memoise_2.0.1               pbapply_1.5-0              
#>  [71] yulab.utils_0.0.5           sass_0.4.1                 
#>  [73] stringi_1.7.6               S4Vectors_0.34.0           
#>  [75] desc_1.4.1                  foreach_1.5.2              
#>  [77] BiocGenerics_0.42.0         zip_2.2.0                  
#>  [79] BiocParallel_1.30.3         shape_1.4.6                
#>  [81] GenomeInfoDb_1.32.2         rlang_1.0.3                
#>  [83] pkgconfig_2.0.3             systemfonts_1.0.4          
#>  [85] matrixStats_0.62.0          bitops_1.0-7               
#>  [87] mzID_1.34.0                 evaluate_0.15              
#>  [89] lattice_0.20-45             purrr_0.3.4                
#>  [91] htmlwidgets_1.5.4           tidyselect_1.1.2           
#>  [93] ggsci_2.9                   plyr_1.8.7                 
#>  [95] R6_2.5.1                    IRanges_2.30.0             
#>  [97] generics_0.1.3              DelayedArray_0.22.0        
#>  [99] DBI_1.1.3                   withr_2.5.0                
#> [101] pillar_1.7.0                MsCoreUtils_1.8.0          
#> [103] RCurl_1.98-1.7              tibble_3.1.7               
#> [105] crayon_1.5.1                utf8_1.2.2                 
#> [107] plotly_4.10.0               tzdb_0.3.0                 
#> [109] rmarkdown_2.14              GetoptLong_1.0.5           
#> [111] grid_4.2.1                  data.table_1.14.2          
#> [113] digest_0.6.29               tidyr_1.2.0                
#> [115] gridGraphics_0.5-1          textshaping_0.3.6          
#> [117] Rdisop_1.56.0               stats4_4.2.1               
#> [119] munsell_0.5.0               viridisLite_0.4.0          
#> [121] ggplotify_0.1.0             bslib_0.3.1