Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ERROR: cannot convert a value to missing for assignment #146

Open
melchuawy opened this issue Nov 11, 2024 · 1 comment
Open

ERROR: cannot convert a value to missing for assignment #146

melchuawy opened this issue Nov 11, 2024 · 1 comment

Comments

@melchuawy
Copy link

Python code:

#!/usr/bin/env python3

import argparse
import csv
import os
import sys
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import scipy.io as sio
import subprocess



def main():
    # Parse input arguments.
    args = parse_args()

    # Check for existence of input file.
    if (not os.path.exists(args.baysor)):
        print("The specified Baysor output (%s) does not exist!" % args.baysor)
        sys.exit(0)

    # Check if output folder already exist.
    if (os.path.exists(args.out)):
        print("The specified output folder (%s) already exists!" % args.out)
        sys.exit(0)


    # Read 5 columns from transcripts Parquet file
    transcripts_df = pd.read_csv(args.baysor,
                                 usecols=["gene",
                                          "cell",
                                          "assignment_confidence"])

    # Find distinct set of features.
    features = np.unique(transcripts_df["gene"])

    # Create lookup dictionary
    feature_to_index = dict()
    for index, val in enumerate(features):
        feature_to_index[str(val)] = index

    # Find distinct set of cells. Discard the first entry which is 0 (non-cell)
    cells = np.unique(transcripts_df["cell"])[1:]


    # Create a cells x features data frame, initialized with 0
    matrix = pd.DataFrame(0, index=range(len(features)), columns=cells, dtype=np.int32)


    # Iterate through all transcripts
    for index, row in transcripts_df.iterrows():
        if index % args.rep_int == 0:
            print(index, "transcripts processed.")

        feature = str(row['gene'])
        cell = row['cell']
        conf = row['assignment_confidence']

        # Ignore transcript below user-specified cutoff
        if conf < args.conf_cutoff:
            continue

        # If cell is not 0 at this point, it means the transcript is associated with a cell
        if cell != 0:
            # Increment count in feature-cell matrix
            matrix.at[feature_to_index[feature], cell] += 1

    # Call a helper function to create Seurat and Scanpy compatible MTX output
    write_sparse_mtx(args, matrix, cells, features)



#--------------------------
# Helper functions

def parse_args():
    """Parses command-line options for main()."""
    summary = 'Map Xenium transcripts to Baysor segmentation result. \
               Generate Seurat/Scanpy-compatible feature-cell matrix.'

    parser = argparse.ArgumentParser(description=summary)
    requiredNamed = parser.add_argument_group('required named arguments')
    requiredNamed.add_argument('-baysor',
                               required = True,
                               help="The path to the *segmentation.csv file produced " +
                                    "by Baysor.")
    requiredNamed.add_argument('-out',
                               required = True,
                               help="The name of output folder in which feature-cell " +
                                    "matrix is written.")
    parser.add_argument('-conf_cutoff',
                        default='0.9',
                        type=float,
                        help="Ignore transcripts with assignment confidence " +
                             " below this threshold. (default: 20.0)")
    parser.add_argument('-rep_int',
                        default='100000',
                        type=int,
                        help="Reporting interval. Will print message to stdout " +
                             "whenever the specified # of transcripts is processed. " +
                             "(default: 100000)")

    try:
        opts = parser.parse_args()
    except:
        sys.exit(0)

    return opts



def write_sparse_mtx(args, matrix, cells, features):
    """Write feature-cell matrix in Seurat/Scanpy-compatible MTX format"""

    # Create the matrix folder.
    os.mkdir(args.out)

    # Convert matrix to scipy's COO sparse matrix.
    sparse_mat = sparse.coo_matrix(matrix.values)

    # Write matrix in MTX format.
    sio.mmwrite(args.out + "/matrix.mtx", sparse_mat)

    # Write cells as barcodes.tsv. File name is chosen to ensure
    # compatibility with Seurat/Scanpy.
    with open(args.out + "/barcodes.tsv", 'w', newline='') as tsvfile:
        writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n')
        for cell in cells:
            writer.writerow(["cell_" + str(cell)])

    # Write features as features.tsv. Write 3 columns to ensure
    # compatibility with Seurat/Scanpy.
    with open(args.out + "/features.tsv", 'w', newline='') as tsvfile:
        writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n')
        for f in features:
            feature = str(f)
            if feature.startswith("NegControlProbe_") or feature.startswith("antisense_"):
                writer.writerow([feature, feature, "Negative Control Probe"])
            elif feature.startswith("NegControlCodeword_"):
                writer.writerow([feature, feature, "Negative Control Codeword"])
            elif feature.startswith("BLANK_"):
                writer.writerow([feature, feature, "Blank Codeword"])
            else:
                writer.writerow([feature, feature, "Gene Expression"])

    # Seurat expects all 3 files to be gzipped
    subprocess.run("gzip -f " + args.out + "/*", shell=True)



if __name__ == "__main__":
    main()

ERROR: cannot convert a value to missing for assignment
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:35
  [2] nonmissingtype_checked(T::Type)
    @ Base ./missing.jl:44
  [3] convert(::Type{Missing}, x::Int64)
    @ Base ./missing.jl:70
  [4] setindex!(A::Vector{Missing}, x::Int64, i1::Int64)
    @ Base ./array.jl:1021
  [5] _replace!
    @ ./set.jl:798 [inlined]
  [6] replace_pairs!
    @ ./set.jl:616 [inlined]
  [7] replace!(A::Vector{Missing}, old_new::Pair{Missing, Int64}; count::Int64)
    @ Base ./set.jl:606
  [8] replace!
    @ ./set.jl:606 [inlined]
  [9] parse_prior_assignment(prior_segmentation::Vector{Int64}; min_molecules_per_segment::Int64, unassigned_label::Int64)
    @ Baysor.DataLoading /home/viktor_petukhov/.julia/dev/Baysor/src/data_loading/cli_wrappers.jl:14
 [10] load_prior_segmentation!(path::String, df_spatial::DataFrames.DataFrame; min_molecules_per_segment::Int64, min_molecules_per_cell::Int64, estimate_scale::Bool, unassigned_label::String)
    @ Baysor.DataLoading /home/viktor_petukhov/.julia/dev/Baysor/src/data_loading/cli_wrappers.jl:255
 [11] load_prior_segmentation!(df_spatial::DataFrames.DataFrame, prior_segmentation::String, opts::Baysor.Utils.SegmentationOptions; min_molecules_per_cell::Int64, min_molecules_per_segment::Int64, plot::Bool)
    @ Baysor.CommandLine /home/viktor_petukhov/.julia/dev/Baysor/src/cli/main.jl:195
 [12] run(coordinates::String, prior_segmentation::String; config::Baysor.Utils.RunOptions, x_column::Symbol, y_column::Symbol, z_column::Symbol, gene_column::Symbol, min_molecules_per_cell::Int64, scale::Float64, scale_std::String, n_clusters::Int64, prior_segmentation_confidence::Float64, output::String, plot::Bool, polygon_format::String, count_matrix_format::String)
    @ Baysor.CommandLine /home/viktor_petukhov/.julia/dev/Baysor/src/cli/main.jl:108
 [13] command_main(ARGS::Vector{String})
    @ Baysor /home/viktor_petukhov/.julia/packages/Comonicon/F3QqZ/src/codegen/julia.jl:343
 [14] command_main
    @ /home/viktor_petukhov/.julia/packages/Comonicon/F3QqZ/src/codegen/julia.jl:90 [inlined]
 [15] julia_main()
    @ Baysor /home/viktor_petukhov/.julia/packages/Comonicon/F3QqZ/src/frontend/cast.jl:481
 [16] top-level scope
    @ none:1
@VPetukhov
Copy link
Collaborator

@melchuawy , could you provide a minimal reproducible example and explain what you're actually trying to do?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants