vignettes/based_on_mass_dataset.Rmd
based_on_mass_dataset.Rmd
If you want to develop new functions based on “mass_dataset” class in
your package. You need to make the massdataset
package as
your package dependency.
In DESCRIPTION
file, set massdataset
as
your Imports
and Remotes
Imports:
massdataset
Remotes:
tidymass/massdataset
Please set “mass_dataset” class as your new function primary input data. If the function is directory to process one component of “mass_dataset”, just get the component and then process it.
library(massdataset)
data("expression_data")
data("sample_info")
data("sample_info_note")
data("variable_info")
data("variable_info_note")
object =
create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info,
sample_info_note = sample_info_note,
variable_info_note = variable_info_note
)
object
#> --------------------
#> massdataset version: 1.0.12
#> --------------------
#> 1.expression_data:[ 1000 x 8 data.frame]
#> 2.sample_info:[ 8 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 1 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22
For example, we can create a function named
get_class_test()
to get “class” from “sample_info”.
get_class_test <- function(object){
object@sample_info$class
}
get_class_test(object)
#> [1] "Blank" "Blank" "QC" "QC" "Subject" "Subject" "Subject"
#> [8] "Subject"
If the function is general, you can use the
activate_mass_dataset()
to tell you function which
component you want to preocess.
For example, we can create a function named
remove_blank()
from “sample_info” or “expression_data”.
remove_blank <- function(object) {
if (length(object@activated) == 0) {
stop("activate_mass_dataset() first.\n")
}
if (object@activated == "sample_info") {
sample_info <-
object@sample_info
sample_info <-
sample_info %>%
dplyr::filter(class != "Blank")
object@sample_info <-
sample_info
object <-
update_mass_dataset(object)
return(object)
}
if (object@activated == "expression_data") {
expression_data <-
object@expression_data
expression_data <-
expression_data %>%
dplyr::select(-dplyr::contains("Blank"))
object@expression_data <-
expression_data
object <-
update_mass_dataset(object)
return(object)
}
if (object@activated != "expression_data" & object@activated != "sample_info") {
stop("Only support sample_info or expression_data.\n")
}
}
object %>%
remove_blank()
object %>%
activate_mass_dataset(what = "sample_info") %>%
remove_blank()
#> --------------------
#> massdataset version: 1.0.12
#> --------------------
#> 1.expression_data:[ 1000 x 6 data.frame]
#> 2.sample_info:[ 6 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-08-07 19:21:22
object %>%
activate_mass_dataset(what = "expression_data") %>%
remove_blank()
#> --------------------
#> massdataset version: 1.0.12
#> --------------------
#> 1.expression_data:[ 1000 x 6 data.frame]
#> 2.sample_info:[ 6 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-08-07 19:21:22
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-08-07 19:21:22
object %>%
activate_mass_dataset(what = "variable_info") %>%
remove_blank()
If you want to make other functions from other package support “mass_dataset”. We have two examples here:
#' @title head
#' @method head mass_dataset
#' @param x x
#' @export
#' @rdname processing-mass_dataset
#' @return mass_dataset class object
head.mass_dataset = function(x, ...){
x@expression_data = head(x@expression_data, ...)
x = update_mass_dataset(x)
return(x)
}
#' @title apply
#' @method apply mass_dataset
#' @param X X
#' @param MARGIN MARGIN
#' @param FUN FUN
#' @param ... ...
#' @param simplify simplify
#' @export
#' @rdname summary-mass_dataset
#' @return result
setMethod(f = "apply",
signature(X = "mass_dataset"),
function (X, MARGIN, FUN, ..., simplify = TRUE) {
apply(as.matrix(X@expression_data),
MARGIN, FUN, ..., simplify = simplify)
})
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_3.3.6 magrittr_2.0.3 masstools_1.0.2 massdataset_1.0.12
#>
#> loaded via a namespace (and not attached):
#> [1] colorspace_2.0-3 rjson_0.2.21
#> [3] ellipsis_0.3.2 rprojroot_2.0.3
#> [5] circlize_0.4.15 XVector_0.36.0
#> [7] GenomicRanges_1.48.0 GlobalOptions_0.1.2
#> [9] fs_1.5.2 clue_0.3-61
#> [11] rstudioapi_0.13 mzR_2.30.0
#> [13] affyio_1.66.0 fansi_1.0.3
#> [15] codetools_0.2-18 ncdf4_1.19
#> [17] doParallel_1.0.17 cachem_1.0.6
#> [19] impute_1.70.0 knitr_1.39
#> [21] jsonlite_1.8.0 cluster_2.1.3
#> [23] vsn_3.64.0 png_0.1-7
#> [25] readr_2.1.2 BiocManager_1.30.18
#> [27] compiler_4.2.1 httr_1.4.3
#> [29] assertthat_0.2.1 Matrix_1.4-1
#> [31] fastmap_1.1.0 lazyeval_0.2.2
#> [33] limma_3.52.2 cli_3.3.0
#> [35] htmltools_0.5.2 tools_4.2.1
#> [37] gtable_0.3.0 glue_1.6.2
#> [39] GenomeInfoDbData_1.2.8 affy_1.74.0
#> [41] dplyr_1.0.9 Rcpp_1.0.8.3
#> [43] MALDIquant_1.21 Biobase_2.56.0
#> [45] jquerylib_0.1.4 pkgdown_2.0.5
#> [47] vctrs_0.4.1 preprocessCore_1.58.0
#> [49] iterators_1.0.14 xfun_0.31
#> [51] stringr_1.4.0 openxlsx_4.2.5
#> [53] lifecycle_1.0.1 XML_3.99-0.10
#> [55] zlibbioc_1.42.0 MASS_7.3-57
#> [57] scales_1.2.0 MSnbase_2.22.0
#> [59] hms_1.1.1 ragg_1.2.2
#> [61] pcaMethods_1.88.0 MatrixGenerics_1.8.1
#> [63] ProtGenerics_1.28.0 parallel_4.2.1
#> [65] SummarizedExperiment_1.26.1 RColorBrewer_1.1-3
#> [67] ComplexHeatmap_2.12.0 yaml_2.3.5
#> [69] memoise_2.0.1 pbapply_1.5-0
#> [71] yulab.utils_0.0.5 sass_0.4.1
#> [73] stringi_1.7.6 S4Vectors_0.34.0
#> [75] desc_1.4.1 foreach_1.5.2
#> [77] BiocGenerics_0.42.0 zip_2.2.0
#> [79] BiocParallel_1.30.3 shape_1.4.6
#> [81] GenomeInfoDb_1.32.2 rlang_1.0.3
#> [83] pkgconfig_2.0.3 systemfonts_1.0.4
#> [85] matrixStats_0.62.0 bitops_1.0-7
#> [87] mzID_1.34.0 evaluate_0.15
#> [89] lattice_0.20-45 purrr_0.3.4
#> [91] htmlwidgets_1.5.4 tidyselect_1.1.2
#> [93] ggsci_2.9 plyr_1.8.7
#> [95] R6_2.5.1 IRanges_2.30.0
#> [97] generics_0.1.3 DelayedArray_0.22.0
#> [99] DBI_1.1.3 withr_2.5.0
#> [101] pillar_1.7.0 MsCoreUtils_1.8.0
#> [103] RCurl_1.98-1.7 tibble_3.1.7
#> [105] crayon_1.5.1 utf8_1.2.2
#> [107] plotly_4.10.0 tzdb_0.3.0
#> [109] rmarkdown_2.14 GetoptLong_1.0.5
#> [111] grid_4.2.1 data.table_1.14.2
#> [113] digest_0.6.29 tidyr_1.2.0
#> [115] gridGraphics_0.5-1 textshaping_0.3.6
#> [117] Rdisop_1.56.0 stats4_4.2.1
#> [119] munsell_0.5.0 viridisLite_0.4.0
#> [121] ggplotify_0.1.0 bslib_0.3.1