This repository has been archived by the owner on Sep 30, 2024. It is now read-only.
forked from HusseinLakkis01/scCoAnnotate
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.config.yml
66 lines (60 loc) · 2.84 KB
/
example.config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Example config for scCoAnnotate annotation and benchmarking workflow
# target directory
output_dir: <output directory for the annotation workflow>
### Description of some non-tool specific parameters
references:
<reference_name>:
expression: <path to counts>
labels: <path to labels>
# Convert gene symbols in reference from mouse to human
# Accepted values: True, False
output_dir_benchmark: <output directory for the benchmarking workflow>
convert_ref_mm_to_hg: False
# The ontology permits to specify different level of labels granularity.
# These parameters are optional - delete if unused
ontology:
# Path to the csv containing the ontology path. Each column represents a different
# granularity of labels. The columns should be named.
ontology_path: <path to ontology.csv>
# The column name(s) of the granularity to use, from the ontology file.
# This parameter can take multiple column names, therefore they should be put in a list
# (ex: ['level'] ['level1', 'level2'])
ontology_column: <ontology_column to use>
# Some references are too big and cannot be used efficiently
# the following parameters permit to downsample the reference
downsample:
# The number of cells to downsample to
# If the value is > 1, it specifies the number of cells to select (ex: 500 will select 500 cells)
# If the value is < 1, it is interpreted as a fraction of cells to keep (ex: 0.25 will select 25% of the cells)
value: 500
# Should the sample keep the same stratification as the complete dataset?
# Accepted values: True, False
stratified: True
# The minimal number of cells that each cluster should have, in the reference
# Clusters with less cells will be filtered out from the reference
min_cells_per_cluster: 100
# path to query datasets (cell x gene raw counts)
query_datasets:
<query_name_1>: <path to counts 1>
<query_name_2>: <path to counts 2>
<query_name_3>: <path to counts 3>
# classifiers to run
tools_to_run:
- tool1
- tool2
# consensus method
consensus:
tools:
- 'all'
type:
majority:
# (ex: [2] [2,3,4])
min_agree: <minimum agreemeent to use>
CAWPE:
#(ex: ['CAWPE_T'], ['CAWPE_T','CAWPE_CT'])
mode: <CAWPE MODE>
#(ex: [4], [2,3,4])
alpha: <alpha value>
# benchmark parameters
benchmark:
n_folds: <number of folds to use in the benchmarking>