Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add multiome check script #147

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions tools/scripts/compare_gex_h5ad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import anndata
import sys
def load_anndata(file_path):
# Load an AnnData object from the specified H5AD file
return anndata.read_h5ad(file_path)
def main():
# Check if the correct number of command-line arguments is provided
if len(sys.argv) != 3:
print("Usage: python script.py <test_h5ad_file> <truth_h5ad_file>")
sys.exit(1)
# Get the input file paths from command-line arguments
test_h5ad_path = sys.argv[1]
truth_h5ad_path = sys.argv[2]
# Load AnnData objects from the H5AD files
test = load_anndata(test_h5ad_path)
truth = load_anndata(truth_h5ad_path)
# Print some information about the loaded AnnData objects (optional)
print("Loaded test AnnData:")
print(test_h5ad_path)
print(test)
print("\nLoaded truth AnnData:")
print(truth)
print(truth_h5ad_path)
# Now you can use the 'test' and 'truth' AnnData objects as needed in your script
# Do comparisons between test and truth obs
print("Comparing test cell metrics to truth cell metrics using truth as ref")
command1="test.obs."
command2="truth.obs."
for x in truth.obs.columns:
z = eval(command1+x)
y = eval(command2+x)
if z.equals(y)==False:
print("Cell Metric Column does not match:")
print(x)
print("Sum of test: ")
print(z.sum())
print("Sum of truth: ")
print(y.sum())
print("Comparing test gene metrics to truth gene metrics using truth as ref")
command3="test.var."
command4="truth.var."
for x in truth.var.columns:
z = eval(command3+x)
y = eval(command4+x)
if z.equals(y)==False:
print("Gene Metric Column does not match:")
print(x)
print("Making gene_names unique")
test.var_names_make_unique()
truth.var_names_make_unique()
for x in truth.var.columns:
z = eval(command3+x)
y = eval(command4+x)
if z.equals(y)==False:
print("Gene metric does not match after making gene names unique")
print(x)
print("Done")
print("If no warning above Done, gene metrics match now that they are unique")
print("Testing for new obs columns in test data set:")
for x in test.obs.columns:
if x not in truth.obs.columns:
print("Column not in truth", x)
print("Done")
print("If no warning above Done, no new obs columns in test matrix")
print("Testing for new var columns in test data set:")
for x in test.var.columns:
if x not in truth.var.columns:
print("Column not in truth", x)
print("Done")
print("If no warning above Done, no new var columns in test matrix")
print("Testing matrix count sums")
if test.X.sum()==truth.X.sum():
print("Counts match")
else:
print("Counts do not match")
print("Testing that multimapped_reads + unique_reads = n_reads in test")
if test.obs.n_reads.sum()==(test.obs.reads_mapped_multiple.sum()+test.obs.reads_mapped_uniquely.sum()):
print("N_reads = reads_mapped_multiple+unique")
else:
print("Sums of unique, multimapped do not equal n_reads")
print("Sums of unqie + multimapped: ")
print(test.obs.reads_mapped_multiple.sum()+test.obs.reads_mapped_uniquely.sum())
print("Sums of n_reads: ")
print(test.obs.n_reads.sum())
print("Sum of unique: ")
print(test.obs.reads_mapped_uniquely.sum())
print("Sum of multimapped: ")
print(test.obs.reads_mapped_multiple.sum())
if test.obs.n_reads.sum()==(test.obs.reads_mapped_multiple.sum()+test.obs.reads_mapped_uniquely.sum()+test.obs.reads_mapped_intergenic.sum()):
print("N_reads = reads_mapped_multiple+unique+intergenic")
else:
print("Sums of unique, multimapped, intergenic do not equal n_reads")
if __name__ == "__main__":
main()
Loading