config.yaml

# ======================== #
# bincs configuration file #
# ======================== #


# -------
# General
# -------
# Settings common to all the targets.
# Path to store the intermediate and final results. Will be created if it does
# not exist.
# (Required)
prefix: /mnt/scratch/projects/epipp

# If set, ensures that Snakemake won't start unless you use the tmux session
# manager.
# (Not required)
tmux: True


# -------------
# Sample Sheets
# -------------
# The sample sheet describes the sequence files to use, their type (ChIP/Input)
# and which experimental condition they belong to.
# Main sample sheet.
# (Required)
sample_sheet: sample_sheet_fixed.txt

# A sample sheet of external controls. For example the same ChIP used in a
# different species. Can be used to find non-specific binding by aligning these
# files to the genome used and seeing which bins get a lot of reads. These are
# candidates for blacklisting and tested according to a Poisson model.
# (Not required)
external_control_sample_sheet: sample_sheet_controls.txt


# -----------------------
# Genomes And Annotations
# -----------------------
# Which genomes and annotations to use
# Which genome to use. Must be the UCSC name. Mostly used to automatically fetch
# genome and chromosome sizes.
# (Required)
genome: hg38


# ----------------------
# Gene Overlap Barcharts
# ----------------------
# Settings for the gene overlap barcharts.
# Radius around TSS/TES that is considered a part of the TSS/TES. Eg. if 1000,
# then then TSS/TES is considered to be any area within 1000 nucleotides of it
# is considered to be the TSS.
# (Not required)
barchart_tss_length: 3000


# ---------
# Alignment
# ---------
# These flags and options are only of interest if your filetype is fastq.
# The prefix of the hisat2 genome index. Premade indexes can be found at
# ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data These indexes are used to align
# the fastqs to the genome. If the input is not fastq, these files are not used.
# (Not required)
hisat2_index_prefix: /mnt/cargo/genomes/hisat2/hg38/genome

# Additional flags to give to hisat2.
# (Not required)
hisat2_extra_flags: --no-spliced-alignment -k 1 --no-discordant --no-mixed

# If this flag is set, multi aligning reads from the alignment are removed (i.e.
# those with a NH:i field of 1.)
# (Not required)
keep_multi_aligning_reads: True

# A list of adapters to remove from your fastqs.
# (Not required)
adapters:

# The minimum length of your reads after adapter removal.
# (Not required)
min_read_length: 14


# ----------
# Sequencing
# ----------
# heyo
# Whether or not the reads are paired end.
# (Required)
paired_end: False

# Estimated size of the sequenced fragments.
# (Required)
fragment_length:

# A two-column headerless file of estimated sizes of the sequenced fragments per
# group. Col1 is the group names, col2 the frag sizes.
# (Required)
fragment_lengths: "private/epipp/frag_sizes.txt"

# Minimum required fastq quality.
# (Not required)
fastq_quality: 20


# -------------------------
# Heatmaps And Profileplots
# -------------------------
# Settings for heatmaps and profileplots.
# The gzipped gencode annotation file to use. It is used to find the regions to
# be plotted in heatmaps and profileplots. If the path starts with www/http/ftp
# it will be downloaded, otherwise the path will be interpreted to be local.
# This kind of high-quality annotation only exists for humans and mice. If your
# genome is any other, the appropriate regions will be downloaded from UCSC
# refGene for your genome.
# (Not required)
annotation_gff3: ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_27/gencode.v27.annotation.gff3.gz

# What way to sort the heatmaps. Only applicable if sort_order_group is not
# used. Options: keep, no, descend, ascend. Keep will keep the sort order of the
# regions and make all the groups have the same sort order. descend/ascend will
# sort each group according to the signal.
# (Not required)
sort_order: keep

# If given, all heatmaps are sorted according to the sort order for this group.
# This makes it easy to compare heatmaps between groups. If not given, each
# heatmap is sorted individually.
# (Not required)
sort_order_group:

# Only applicable for group_vs_group heatmaps! If given, all group vs group
# heatmaps are sorted according to the sorting for sort_order_group vs
# second_sort_order_group. This makes it easy to compare heatmaps between
# groups. If not given, each heatmap is sorted individually.
# (Not required)
second_sort_order_group:

# The distance to be shown upstream and downstream of any region with 'gene' in
# the name in the heatmaps and profileplots.
# (Not required)
tss_distance_gene: 3000

# The distance to be shown upstream and downstream of any region without 'gene'
# in the name in the heatmaps and profileplots.
# (Not required)
tss_distance_other: 500

# `regions` is a list of the regions to be plotted in the heatmaps and
# profileplots. The following regions are valid: ```'CDS', 'exon', #
# 'five_prime_UTR', 'gene', 'start_codon', 'stop_codon',
# 'stop_codon_redefined_as_selenocysteine', 'three_prime_UTR', 'transcript',
# 'internal_exon'``` Using it requires that you either have the configuration
# variable `remote_annotation_gff3` or `local_annotation_gff3` defined.
# (Not required)
regions: ['exon', 'gene', 'internal_exon', 'tss']

# `custom_regions` is a map between region names and bed-files used to define
# the custom regions. Custom region files can also be used to draw different
# lines and colors for different regions in the same plot. This can be done by
# adding a seventh column called deepTools_group to the file. If used, the file
# must be sorted on the column deepTools_group.
# (Not required)
custom_regions: None


# -----------------------
# Differential Enrichment
# -----------------------
# limma and stuff
# Contrasts to test for differential enrichment between groups. The names used
# must be the same as the column names in the design_matrix. By default each
# group in the design matrix is tested against the others in pairs. By default a
# design matrix with an intercept where each group in the sample sheet is a
# column in the design matrix. Command used is model.matrix(~0 + groups).
# (Not required)
contrasts: None

# The file used to describe the experimental setup.
# (Not required)
design_matrix: None


# ------------
# Peak Calling
# ------------
# peak calling settings
# The peak callers used to find peaks. Valid values are epic and macs2.
# (Not required)
peak_callers: ['epic']

# The epic gaps-allowed parameter.
# (Not required)
epic_gap: 3

# The epic window-size parameter.
# (Not required)
epic_window_size: 200

# The macs2 --broad parameter.
# (Not required)
macs2_broad:

# Any other flags you wish to pass to macs2
# (Not required)
macs2_extra_flags:

# The window size csaw should use.
# (Not required)
csaw_window_size: 200

# The minimum required count sum across libraries for a bin to be included in
# the analysis.
# (Not required)
csaw_filter: 1