From d776468b5b7b18ac88eac7b2fa4f6a60fcc3a081 Mon Sep 17 00:00:00 2001 From: Jeff Kimbrel Date: Thu, 3 Oct 2024 11:18:01 -0700 Subject: [PATCH] v0.17.0 spike-in logic started --- DESCRIPTION | 2 +- NAMESPACE | 3 +++ NEWS.md | 4 ++++ R/jgi_feature_df.R | 13 +++++++++++++ R/jgi_sample_df.R | 25 +++++++++++++++++++++++++ R/jgi_source_df.R | 15 +++++++++++++++ R/sysdata.rda | Bin 149 -> 7195 bytes _pkgdown.yml | 6 ++++++ data-raw/example_qsip_data.R | 17 +++++++++++++++++ man/jgi_feature_df.Rd | 11 +++++++++++ man/jgi_sample_df.Rd | 11 +++++++++++ man/jgi_source_df.Rd | 11 +++++++++++ 12 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 R/jgi_feature_df.R create mode 100644 R/jgi_sample_df.R create mode 100644 R/jgi_source_df.R create mode 100644 man/jgi_feature_df.Rd create mode 100644 man/jgi_sample_df.Rd create mode 100644 man/jgi_source_df.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 468af54..1fb608a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: qSIP2 Title: qSIP Analysis -Version: 0.16.15 +Version: 0.17.0 Authors@R: person("Jeff", "Kimbrel", , "kimbrel1@llnl.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7213-9392")) diff --git a/NAMESPACE b/NAMESPACE index df83e28..4a2d0f4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,9 @@ export(infer_source_data) export(is_qsip_data) export(is_qsip_data_list) export(is_qsip_filtered) +export(jgi_feature_df) +export(jgi_sample_df) +export(jgi_source_df) export(multi_qsip_wrapper) export(multi_qsip_wrapper_launcher) export(n_resamples) diff --git a/NEWS.md b/NEWS.md index 262e647..cdfaf9e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# qSIP2 0.17.0 + +* New functions to work with JGI spike-ins + # qSIP2 0.16 * Added new vignettes diff --git a/R/jgi_feature_df.R b/R/jgi_feature_df.R new file mode 100644 index 0000000..905e6d6 --- /dev/null +++ b/R/jgi_feature_df.R @@ -0,0 +1,13 @@ +#' Make a feature dataframe from a JGI coverage file +#' +#' @export + +jgi_feature_df <- function(coverage_file) { + readr::read_csv(coverage_file, show_col_types = F) |> + tidyr::pivot_longer( + cols = c(dplyr::everything(), -Feature), + names_to = "sample_id", + values_to = "COVERAGE" + ) |> + dplyr::rename("feature_id" = "Feature") +} diff --git a/R/jgi_sample_df.R b/R/jgi_sample_df.R new file mode 100644 index 0000000..3c24bab --- /dev/null +++ b/R/jgi_sample_df.R @@ -0,0 +1,25 @@ +#' Make a sample dataframe from JGI proposal file +#' +#' @export + +jgi_sample_df <- function(file_path, skip = 27) { + readr::read_lines(proposal_file, + skip = skip + ) |> + tibble::enframe() |> + dplyr::filter(stringr::str_detect(value, "^\\d+\\.\\d+")) |> + tidyr::separate(value, sep = "\t", into = c("Fraction", "Fraction_eluted_volume (uL)", "Fraction_density (g/mL)", "Eluted_DNA_concentration (ng/uL)", "Run_date", "Library_name", "Fastq_name", "Sequencing_project_ID", "Sequins_added (pg)", "Mix_type", "Raw_reads_count", "Filtered_reads_count")) |> + tidyr::separate(Fraction, sep = " ", into = c("Fraction", "sample_id")) |> + tidyr::separate(Fraction, sep = "\\.", into = c("SOURCE", "Fraction")) |> + dplyr::select(sample_id, + SOURCE, + gradient_pos = Fraction, + gradient_pos_density = `Fraction_density (g/mL)`, + eluted_volume_ul = `Fraction_eluted_volume (uL)`, + eluted_conc_ng_ul = `Eluted_DNA_concentration (ng/uL)`, + sequins_pg = `Sequins_added (pg)`, + MIX = Mix_type + ) |> + dplyr::left_join(sources, by = "SOURCE") |> + dplyr::select(-SOURCE) +} diff --git a/R/jgi_source_df.R b/R/jgi_source_df.R new file mode 100644 index 0000000..65f68fd --- /dev/null +++ b/R/jgi_source_df.R @@ -0,0 +1,15 @@ +#' Make a source dataframe from JGI proposal file +#' +#' @export + +jgi_source_df <- function(file_path, skip = 27) { + readr::read_lines(file_path, + skip = skip + ) |> + tibble::enframe() |> + dplyr::filter(stringr::str_detect(value, "^\\d+\\. ")) |> + tidyr::separate(value, sep = "\t", into = c("Source_sample", "Source_sample_ID", "Sample_group", "Group ID", "Isotope_label", "SIP_combined_assembly_AP_ID")) |> + tidyr::separate(Source_sample, sep = " ", into = c("SOURCE", "SOURCE_ID")) |> + dplyr::mutate(SOURCE = stringr::str_remove(SOURCE, "\\.")) |> + dplyr::select(SOURCE, source_mat_id = SOURCE_ID, isotope = Isotope_label) +} diff --git a/R/sysdata.rda b/R/sysdata.rda index 96c417ed5c9ad422ebd262e7258843c9bf14fe17..d92b390f4a158487e6f8db3d3699e1c36936f57d 100644 GIT binary patch literal 7195 zcmai0cTf{vmjyzLB!GZQm(algF@(?&2qG*~NThcVRP5`wJNxbI%+Boj&yqR$oSE``Aju75*#Qd0JLwPv!c2v9UO=k|VQqN3Yl31=Apj=Kbn*Uh;= z94!sYaPSj%Keu3QEJcxKz>n$$tZJ zOY-0ofVmD1=#ziN|3bPdm$h|zhP1DkFBg>ynDF2F|8eqIDLzL0TL+l`yXE5gOM?E3 zj-D12v@%b766%V$jKkqixy7iG%s(Sy6S%;CFOA})V=XG2Y`FLn*SNU&6Gj0j>jeG; zu2HTafVpC}bb$jYpUdr+xE}J+rxMqRU+Vo0rq+eym%)#4T!0CI$26vG<>ULN;+MgQ z&!*<1rsguP3esQ^XX&87{xWCp1JI`C^7pwO3q)SYjBdsep4u%eU7d`WQc)2i60<(D$=oNMJx-k}V!-g+;N$(6srxjz*>*L{W$O&j>vn>61Zda2Vskb zgkt8hJq62YaZ78>ZuI9r{!d%s#A{aDr{@>@YJUq6U^8><$c@4sLz&jD{Gm4E)6Xt8rlOt~Z8`JAUx z#$v4&Xy5ZiGokB+c)!2o8IDHAvy+oXx4+;Xig9VGoZz7MlTK}gRT-fMr3MSQ~ZQY|y~ z*Vy&h72jq|zuQ+v+!cGbh0&2ADfh|f#&cfkQj3@tjbWO$k9)))A{}qzWiO<9av{qt zNgr4xyL-hQ4O|?fF6C>n)-!*ey|FD=R}!;g zg!BwZryq$@wV2+Ji_uV-37ZG$QG=_xJG0%zIs7(eq({ zkQs@XkNNoJj!SC=O$Ry25#fFtkRsDJEy|u&vtO5hIsA`0n*muj{LVI9clp%W;};I5 z(ysqwiSIRIk0SuF0qK{rB6vxlB$aTD9S^gf>6$>0K9se4{CPDK1Y>}ZXn0p3sCK%G zCPg#~!BTpvIm~i*D7NN9FD}$Q}^_`;1+zGepG*0v8 z`iYPfyYeu*#6^TUm8_7L`-1`{dX2k|9!8Ax=fKqy^sEEV#TKK;$}n z&dWeBTw7KQNe+>bBuv-88VF%5LJjN)bb6LSaNg^-s%kpal+%{8XjJZ@atJSo^Iz(8 zl_bzL0!aM;N&hxzOBe&r=&AMphoLnMe}@t6MDa^%3MMI}DhkeU$Hqb(9mILV>8C}l z4)X@#@`0*?Nr+Lg6;h9d%;n8H7NV!wi1V}RRd0`^simn>B(cnzdz^_T8~%-)6kZea zm2wOsE;a_+Cfw%_p$oI&U5)Ng`!MoSfsik37E7iT7EyKjo6trPL1T28nHz?5YuS^p z3$T^n)RJi;M@#dKUjaxqkzqS1*5aof?C!PV79w4X8f4 zg|?KUL>yvLAqILviAO52i_5Hyi7Gcl@Sp(B%3a{10o%xnTJSG8qxcbu zh*DsPbN1pk3W_8-2GPD?N$x611<~;y(Vg}S^a6en-AFr0Y=EGR7i5?j0Dv)t&l1~5E|#NDP<#1jOV^_6JboGJ7*`DVj~z2#J?(7=)wb|6Ug1DB(P{# z0>#^;es!yhB!pnp_S7(W@9JAXtMMC?h-J=IA0p((*D zBYT4T-Q0mRw?n?NotcSTYZ4q1#L?Z_Qx7PTOdDzkOPkDw6d6ZuugJ-5EJ(Kmj--E8+c&dt=}D?S?UQ8(2c88$=Pa4>a+{OtFys~mkNC2uWSDFPOr zm6L`~`;sX`M%Q{TqU*FVt;9u|XAl`&5JM>xd1}O=3=P2z4gFcrC5=3&2__FJK~D&R z2-Q{P5vF?4NDz}obiGWuoJQyC9juM`Y2a+z>q3pR57hGU@$_$VDAi#y(0wu0F8<`C z)mmvRhdFky?XEYUA^xc-mLz9zbLl{wv^7&wF}5p4k`W7|3ztEdGi}rTU7dJlrDKM) zaZUTko$pFhCf9DXXwm6W;6gZuLmKH;h_9xDky~3yg@$pBF<~>6pvLG3qeU_l!Jv3G z`p!GD5z8RMFvdc3ZuWpTna37n+#> zEBVHuza);~>zZQPAf(>ug-8!(;wPAYT~wfI0V23Eol{<1y*hxSXppwjLO5*{`{yW~)F168C z8w=}KzXdwHEY>$ZI`7=>*%HxT=?r5~h6kmU^r}#f#YQk3TjqV=pUXbrF1}fDLR-v@ zcdSK)0e1$PihaAafA+SYjiP4+=VbwLXB^kUU!e1GLYHO_^Ugh*OhvACS;-6>2~;@> z;dBTpX^ZO z`mtG+(sxzfaQiJEZkiWrr?!B>3y-9P%<0;*vNDEhf8!A`{dDO@4x5I;wHe+l8yVlE zGlacr_r*4AF?5DQHEVJ4*BxK@mbYOvmP6Oqo_?E8=(g7iW6cv;j}_@vZ6R?W)19pb zM=vSD4yTvD3!trKA<<76lwgi<8%U*r7OKP1%W;0+q7U*Co9AFzVoaa)v}-*egV>NA zUzXk&y(xrEMXXQl?u~rl{*fkNO(%QAA$+z2^Cpc?Aww@#n$1+6v8wjYUi@S$5SXWp z+b{^Fx%-dXmU|G6*#focCe86%h4nieCf}?~9;{cc9Sj>PHh^!T+a+caXoFbNCW6j~ zyj+M+w+7OLL}xzA%Nn*66%bv!99$chi?bJ=qJ)c^q7C6z8;k6wkfD6U_6NYv3(L3b ziCsn-NRzjBZJ|X-B7?Fd+k#sqS1PG)VY>bMMZ#~q9Dp;zUG^v=fOV3=YF)7~hg_+T zT_7^)j1tw1Cfhc*E)0xQeVW}=LmKImfzEDDi}$?Jr89!_`*SJaeH)@Nb{>S3t!9no zkxKYC7xPGBLwPRrqO6(Uy7^FxK+43&!qtVMOX1Ka;#_|Yg7~jDpI-3cREycxO$DdX zxaAehvPk-SLT0Po|p9a z+%XB7ZHOk2uWy6ocEA*&=IfZk;rgEsuB4{2c;id-5BKMNr9YFaAV|O8>qE$^(w9HQ z{!j&`Z;(CLVl#GR)egtZH~E;E-koW*=qvn|YUBsn6*#%1$2nQAS24tE!5d?uf6_>= zIU_CCOF0$rXm&U-ULHT^Xu!$mY@6A&%kC|d9(r$DQ6FhiEo#}LGIv+0e#9jzfofo4 zl6|+u=()l~caQSmdd=bZ7)6~Ng-=e-YAuS^R+m(Hrfp5=!SFXR(WA`XzRmyvpb?>XH4p~c4515sQ6k@0* zfLRpg#?egAs4Lb5X_LMZ=)^Bp?C3cjBG97(o{o+d6=9gR9|rK(CjiE_UG zL^VY>M~q|vLh*uEaFPYTpDF~UkxI^e3zT*lwK3I5wv178%J;8LZjZ`5QfE8}}t|#gDG;&C{G)DEP z`+>BvTuNodVFB~YcEw_8j-=SnnZ zSjasG3#EoD{HiZ3&l&tSb{~g@qIp!@@-PE_KLX0et`wmLn<#RmBp?? zlGkW)n5yoI^3e-WOisLgQQbbeJdLC}EUD+%b|m}3&Jf_AI!*a677x!i-|kmwfBmxJ zpt#LSMV-r`iZ_whyrt0(QB+p+035*C&eep>Q+wii%2s7-mx0pS@hD5fuE9g$;T9pqiqq%oHD-Z_u^o>BQN|5$X(3dsFt1_hMd3Z-`Kz z2SrQb5@EpigoEv~1#D#AtL6ppdg5#SxzJlvz3)s3m`&E27XbB%;w+X;H_@+;UNUGV zI{f0wzW`eM{cs>~iTCx@nbU!99L~9v2e=iW&uOaq+vWeG39%#rC*GK`JtvzP-86Fs`b)Nx-oV!xkk{wD~W7fy#L#Ay93`CbM1T#!`Vy={&%qx z%3_Fg6;H5bhPLBoe?_l1b8#i6Uy^*y$HyndCm;|ycEKw=S-@~e$?pYiKB=(-R&PuM3vz|u*$-NeMCK=0%7+ST`C zzQ6OtU}0e~nM#ZC+jp2BH~)STGd2IRH9`HSwe0o(&MgEMa!=m6gjGir*;1; zMfvR4mw!k3S{(4(cb?w&=ARwynCJRJ7s@Z1%3bMR-M%?u+{Qm0S&H7Ags<7qjIc?b zc)INlX(8+Ep4%vsHX?~z@|bWXyU3ye; zhoeB86f>0r)^*u->;XH_A2efsQVd&@>q?LeTUg@>Z76&=_kuh8aL&9(bz}^by=D4V z95{&f%Gi{TfI>rcVRd(6=+VkffGRl=x|v%JHGFPIj-{N})%DT7+87qLx^V^D8MC)o zr?jf2L)(ZWuU({eEhYe zZ;g&!HU+B~g>XB4E4}>PV&em*`M}5!7cFE0E00ezH($+LtcTC6_VE^IGgRK%o7A#u zgcFdy*PZVF)cJambu_$brk)+DqtYJ0Zy)_wW}}+V_N=E7zp)Wgj1f*sno@>Z1Z+<@ zg+4@Ee$Vs}Ca=lXNTt>xHtgPA9emc&`ukWwR|K%ENq1gUgL+g`p%mN49`vXfm_Fd| z-p=yHUDrT&WXWYKuNr4Q_<3bbzveyzWJVXX5G>`+F0NLgn$TPqle6)6iFwX_m`47x zVj11uH{gXklJ+*2F&+|v0+4VNzsy!TJ1a>B3`UhCbX_8@tG;_zi(e_-+7s9{xRhf0 z^Y1b#I3{yf-2uoubM51v@n%3EcD7k z^}1S{j1g+mlpHK2NGP{I(-LX+5V(1G%N;H^B=AUgW&xyL@)IG!;(Yx=4|r3Y)4vjO z&=GQ>h1`cq4z=~&!S;PKijKD|MMX{tbWseV2|;ydVq=*B1@_q}u_0HIkB>C=28{SU z@W!ynu`_QDqh=C)SCnDPEV&_iYgkwm->?39v#z|LkyzU0TL&VuSJh(LEe=sqLzYRt zfgHrO&GhlIPSeKZlJKKKVVY3TXyGLEN1lMAyGR9G2nB{VEx#p|n%ghPWvYyLdQ*HU zlES6+jiMsn8}@y9-f!WS5~BwO4aw+HnoNGOv8}Cawfv&+19cfi=9!t)6ftyVc$T-O z5Q;lR8Ro5%K32i!5?bhmDtL{`8Xe}31cCUE0B=DwR!>PRe&-pijwLRV(n1y~Jt_3d zJd!Dh`?$_dA8FR_H-)OCkXJ@O3$3P}XkHJH6yDVLIq00^8A41#68B-h8uCbh8Zy6) zLv2;7>3nwX`sNhr_^}2WCEH# zw1|Yw#b22pl0I(8R~pUP(Kf4-(XR!h3!TaS*qV?JaF$vMlX?~?3!BK3Z>g#8L?~I+ zqpL~50a`#g@=NHXY}um=f<3-+FfFrwPT9Ukrg&Q}8#{?Yzf?jgZR|^ib@IE*!uT)N zWJr$HDb+(XlH?R4M9Cog^>1w9%n+=cVc1X9Sf$8Y{~R7IShaIUIlAMt3n;QS)yoi~ z70~S>C^HCz?DwQp(9tTcj#DYt>>CKR7Pq*6i*V?obT5wd<3L8MKG@QIwYlTzd+&Y482%MIWnPp> z8g749yw^$Np0VkN6T=vFN_0@2gDR@TdXseVtcj!i5cnj zyKK%S^YCtDT+WT0FthW2FhTKY8)l=7$!HTVDdVQ=zWu`}ZjB?C0n7)jDwfsA!j&+d+D#}R;M8-A!xBw4Iafz()9>qGX9w}MlCmR07t-pQ! zxD~u=PhVearYYen7(n(t#^Y7^Fsrj=%IgYuTeUL0z|yy&2JbHLL?dX9t#vIx>S>(l zhgR^50NgFhlxQcZ2H@4{Q^=ti$@lc@$tH?oPTB+%Qe@`OL;L8NV~XAl$U*c)s*o2w{f)*>Oqk3qv8Ch zpTlV_+$ZGO&cGLucL|ileC`vk<}urt*}*4)JUZR09}sMy(H}s?a9He8r~aFio*4;+ zo-+PL^S(nl+;f5M89S|!R!I{d*9R1pvo&Tv_9jEjisJI4MF<12=Y?MEZ617O`B`bRLWeAGFfcGLRZwm zDsFvM0AEi778U`t@@uxW2zBYT{~wGHd<2HFsx6Ppndoou9_E+XoN+vx)Cv#%dKtfJ$7Y zUa{YYP)Ccbm+C;S7#@q=S%_HYAEkw+%P3&-Ghg9bp9J9NC;q^0Tm`7T@SOyTpacV{ z`jLkuc^;=q>aQ(}$-D9~VDd4CPJ#hP9P7n+)Y?Cmm(}`8pXYuS?J(LGqJld1G`7*! z63~z)yuyqNN1S@P(l4^3ww<5cYS4W>z*779xyFdNII8 Ywu8*^fv4Pm?zAWtsoU@ycsVEh2cvmq>i_@% literal 149 zcmV;G0BZk2T4*^jL0KkKSuD;CsQ>_Le}Mn6zyJsUf4~d@kU+a9%m4rZAOLbJ)fxZ* z4H_B%^wbeaC#c$u02(v^0Fg=#JMyrF`FH9#yGUq2j!TXOd1g<$RYqt-2(U0iC>cNr z&e<2W4Tx6kbd7e+fZ809)@Hr6#cau?!0a2%X$1ujNgqey9Y*?7%0Cx!ML1B9EY1z7 DkJLGN diff --git a/_pkgdown.yml b/_pkgdown.yml index 85b4ba6..56401f4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -62,6 +62,12 @@ reference: - plot_EAF_values - plot_growth_values +- title: Spike-In Controls + contents: + - jgi_source_df + - jgi_sample_df + - jgi_feature_df + - title: Datasets contents: - example_source_df diff --git a/data-raw/example_qsip_data.R b/data-raw/example_qsip_data.R index 6eb0f7c..1c26a80 100644 --- a/data-raw/example_qsip_data.R +++ b/data-raw/example_qsip_data.R @@ -118,6 +118,22 @@ isotope_palette = c( "16O" = "#037bcf", "18O" = "#ff0000" ) +# JGI +jgi_mixes <- readxl::read_excel("/Users/kimbrel1/Library/CloudStorage/OneDrive-LLNL/Documents/Soils_SFA/analysis/qSIP_refactor/JGI_spike_ins/JGI_mixes.xlsx") |> + tidyr::pivot_longer( + cols = tidyr::ends_with("stoichiometry"), + names_to = "MIX", + values_to = "STOICHIOMETRY" + ) |> + tidyr::drop_na(STOICHIOMETRY) |> + dplyr::mutate( + MIX = stringr::str_remove(MIX, "_stoichiometry"), + MIX = stringr::str_remove(MIX, "MIX_") + ) |> + dplyr::mutate(RATIO = STOICHIOMETRY / sum(STOICHIOMETRY), .by = MIX) |> + dplyr::rename(feature_id = ID) |> + dplyr::arrange(MIX) + # save usethis::use_data(example_source_df, overwrite = TRUE) usethis::use_data(example_sample_df, overwrite = TRUE) @@ -135,3 +151,4 @@ usethis::use_data(example_qsip_growth_t0, overwrite = TRUE) usethis::use_data(example_group_dataframe, overwrite = TRUE) usethis::use_data(isotope_palette, overwrite = TRUE, internal = TRUE) +usethis::use_data(jgi_mixes, overwrite = TRUE, internal = TRUE) diff --git a/man/jgi_feature_df.Rd b/man/jgi_feature_df.Rd new file mode 100644 index 0000000..aaa30fe --- /dev/null +++ b/man/jgi_feature_df.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jgi_feature_df.R +\name{jgi_feature_df} +\alias{jgi_feature_df} +\title{Make a feature dataframe from a JGI coverage file} +\usage{ +jgi_feature_df(coverage_file) +} +\description{ +Make a feature dataframe from a JGI coverage file +} diff --git a/man/jgi_sample_df.Rd b/man/jgi_sample_df.Rd new file mode 100644 index 0000000..cbefa06 --- /dev/null +++ b/man/jgi_sample_df.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jgi_sample_df.R +\name{jgi_sample_df} +\alias{jgi_sample_df} +\title{Make a sample dataframe from JGI proposal file} +\usage{ +jgi_sample_df(file_path, skip = 27) +} +\description{ +Make a sample dataframe from JGI proposal file +} diff --git a/man/jgi_source_df.Rd b/man/jgi_source_df.Rd new file mode 100644 index 0000000..a7e4692 --- /dev/null +++ b/man/jgi_source_df.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jgi_source_df.R +\name{jgi_source_df} +\alias{jgi_source_df} +\title{Make a source dataframe from JGI proposal file} +\usage{ +jgi_source_df(file_path, skip = 27) +} +\description{ +Make a source dataframe from JGI proposal file +}