Newer
Older
Main variables used in the workflow ---------------------------------------------------------------
# Retrieving the list of haplotypes (SAMPLES) in data/haplotypes.
# File with .fa extension only will be used. (.fna will not be used)
os.path.basename(file).split('.fa')[0]
for file in os.listdir("data/haplotypes/")
if re.search(r"\.fa", file)
# Retrieving the list of haplotypes excluding the reference
SAMPLES_NOREF = [
sample
for sample in SAMPLES
if sample != os.path.basename(config['reference']).split('.fa')[0]
]
# Retrieving the list of chromosomes from the fasta index of the reference
with gzip.open("data/haplotypes/"+config['reference'], "r") as handle:
CHRLIST = [line.decode().split("#")[-1].split('\n')[0] for line in handle.readlines() if line.decode()[0] == ">"]
# Adding optionnal output based on config.yaml, using the following function
"output/stats/pan1c."+config['name']+".core.stats.tsv", # core stats
expand("output/panacus.reports/"+config['name']+".{chromosome}.histgrowth.html", chromosome=CHRLIST), # panacus histgrowth
expand("output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png", chromosome=CHRLIST), # visualizations from odgi on chromosome graphs
"output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv" # chromosomes graph statistics
if config["get_PAV"] == "True": # Adding PAV matrix creation
if config["get_ASMs_SyRI"] == "True": # Creating SyRI for each input assembly
expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF)
)
if config["get_chrInputs_SyRI"] == "True": # Creating SyRI figures for each PGGB input
expand("output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png", chromosome=CHRLIST)
if config["run_Quast"] == "True": # Running Quast on input haplotypes
if config["get_contig_pos"] == "True": # Chromosome decomposition into its contig figure
expand("output/chr.contig/{haplotype}.contig.png", haplotype=CHRLIST)
if config["create_report"] == "True": # Creating report (need contig)
analysis_inputs.append(
"output/pan1c."+config['name']+".report.md"
)
"""
Rules -------------------------------------------------------------------------------------------
"output/pan1c."+config['name']+".gfa", # Final graph (main output)
"output/pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
"""
Pre-processing section : preparing pggb inputs ---------------------------------------------------
"""
rule ragtag_scaffolding:
# Scaffold input haplotype against the reference to infer chromosome scale sequences
input:
ref="data/haplotypes/"+config['reference'],
reffai="data/haplotypes/"+config['reference']+".fai",
refgzi="data/haplotypes/"+config['reference']+".gzi",
fa="data/haplotypes/{haplotype}.fa.gz"
output:
fa=temp("data/hap.ragtagged/{haplotype}.ragtagged.fa"),
tar="data/hap.ragtagged/{haplotype}.tar.gz"
threads: 8
retries: 1
log:
cmd="logs/ragtag/{haplotype}.ragtag.cmd.log",
time="logs/ragtag/{haplotype}.ragtag.time.log"
/usr/bin/time -v -o {log.time} \
bash scripts/ragtagChromInfer.sh \
-d $(dirname {output.fa})/{wildcards.haplotype} \
-a {params.app_path} \
-t {threads} \
-r {input.ref} \
-q {input.fa} \
#if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
# echo "Error : Empty final fasta"
# exit 1
#fi
"""
rule quast_stats:
# Run Quast on ragtagged genomes
input:
fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
ref="data/haplotypes/"+config['reference']
output:
report="output/quast/"+config['name']+".quast.report.html"
threads: 8
params:
app_path=config["app.path"],
pan_name=config["name"]
cmd="logs/quast/quast.cmd.log",
time="logs/quast/quast.time.log"
/usr/bin/time -v -o {log.time} \
apptainer run {params.app_path}/pan1c-env.sif quast.py \
-t {threads} \
-o "$(dirname {output.report})" \
-r {input.ref} \
--plots-format png \
--no-read-stats \
--no-icarus \
-s \
--large \
# Compressing temporary files
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} $(dirname {output.report})/contigs_reports/*.fa
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
mv $(dirname {output.report})/report.html {output.report}
"""
rule contig_position:
# Produce figures with contig positions
input:
fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
output:
fig="output/chr.contig/{chromosome}.contig.png",
outdir=directory("output/chr.contig/{chromosome}")
threads: 1
params:
app_path=config["app.path"]
shell:
"""
mkdir {output.outdir}
base="{output.outdir}"
hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
# Creating .bands file
apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
-s 5 \
-f {input.fa} \
> $base/{wildcards.chromosome}.gff
awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
> $base/{wildcards.chromosome}.bands
# Creating bed like file
cat {input.fai} | \
cut -f1,2 | tr '\\t' ',' | \
sed "s/,/,1,/" | \
sed "1i chr\\tstart\\tend" | \
tr ',' '\\t' \
> $base/{wildcards.chromosome}.tsv
# Creating figure
apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
-b $base/{wildcards.chromosome}.bands \
-t $base/{wildcards.chromosome}.tsv \
-o {output.fig}
"""
rule chromosome_clustering:
# Read ragtagged fastas and split chromosome sequences into according FASTA files
input:
expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
output:
temp(expand('data/chrInputs/'+config['name']+'.{chromosome}.fa', chromosome=CHRLIST))
params:
app_path=config["app.path"],
pan_name=config["name"]
shell:
"""
mkdir -p $(dirname {output[0]})
apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
--fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
"""
rule SyRI_on_ASM:
# Run SyRI on a single assembly.
# The assembly is mapped on the 'reference' and SyRI search for SV.
input:
ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
output:
fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
wrkdir=directory('data/asm.syri/{haplotype}')
log:
cmd="logs/SyRI_ASM/{haplotype}.SyRI_ASM.cmd.log",
time="logs/SyRI_ASM/{haplotype}.SyRI_ASM.time.log"
threads: 4
retries: 1
resources:
mem_gb=48
params:
app_path=config["app.path"]
shell:
"""
mkdir -p {output.wrkdir}
/usr/bin/time -v -o {log.time} \
bash scripts/getSyriFigs.sh \
-a {params.app_path} \
-t {threads} \
-d {output.wrkdir} \
-o $(basename {output.fig}) \
-r {input.ref} \
mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
"""
"""
Core section : Running PGGB
"""
input:
fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
output:
fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
params:
app_path=config["app.path"],
ref=config['reference']
shell:
"""
mkdir {output.wrkdir}
refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1,2)
## Creating single fasta from multifasta
zcat {input.fasta} | awk -F"#" \
'/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} {output.wrkdir}/*.fa
asm="$(basename $file .fa.gz | cut -f1,2 -d"#" | sed 's/#/\\.hap/').fa.gz"
bash scripts/getSyriFigs.sh \
-a {params.app_path} \
-t {threads} \
-d {output.wrkdir} \
-o $(basename {output.fig}) \
mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
rm {output.wrkdir}/*.fa
"""
rule wfmash_on_chr:
# Run wfmash on a specific chromosome input
input:
fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
output:
mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
threads: 16
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
params:
app_path=config['app.path'],
segment_length=config['wfmash.segment_length'],
mapping_id=config['wfmash.mapping_id'],
wfmash_sec=config['wfmash.secondary']
log:
cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
shell:
"""
## Mapping
/usr/bin/time -v -o {log.time_map} \
apptainer exec {params.app_path}/pggb.sif wfmash \
-s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
--tmp-base $(dirname {output.aln}) \
{input.fa} --approx-map \
1> {output.mapping} \
2> >(tee {log.cmd_map} >&2)
## Aligning
/usr/bin/time -v -o {log.time_aln} \
apptainer exec {params.app_path}/pggb.sif wfmash \
-s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
--tmp-base $(dirname {output.aln}) \
{input.fa} -i {output.mapping} \
--invert-filtering \
1> {output.aln} \
2> >(tee {log.cmd_aln} >&2)
# Compressing
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} -k {output.mapping}
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} -k {output.aln}
"""
rule seqwish:
# Run seqwish on alignement produced by wfmash
input:
fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
aln=rules.wfmash_on_chr.output.aln
output:
temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
threads: 8
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
params:
app_path=config['app.path'],
seqwish=config['seqwish.params']
log:
cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
time="logs/pggb/{chromosome}.seqwish.time.log"
shell:
"""
/usr/bin/time -v -o {log.time} \
apptainer exec {params.app_path}/pggb.sif seqwish \
-s {input.fa} -p {input.aln} -g {output} \
{params.seqwish} -t {threads} \
--temp-dir $(dirname {output}) -P 2>&1 | \
tee {log.cmd}
# Compressing
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} -k {output}
"""
rule gfaffix_on_chr:
# Run gfaffix on seqwish graph
input:
rules.seqwish.output
output:
gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
threads: 1
params:
app_path=config['app.path']
log:
time="logs/pggb/{chromosome}.gfaffix.time.log"
shell:
"""
/usr/bin/time -v -o {log.time} \
apptainer exec {params.app_path}/pggb.sif gfaffix \
{input} -o {output.gfa} -t {output.transform} \
> /dev/null
# Compressing
apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-@ {threads} -k {output.gfa}
"""
rule odgi_postprocessing:
# Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
input:
rules.gfaffix_on_chr.output.gfa
output:
gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
threads: 8
params:
app_path=config['app.path']
log:
cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
time_build="logs/pggb/{chromosome}.odgi_build.time.log",
cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
time_view="logs/pggb/{chromosome}.odgi_view.time.log"
shell:
"""
OGfile="$(dirname {input})/$(basename {input} .gfa)"
## Converting to odgi format and optimizing namespace
/usr/bin/time -v -o {log.time_build} \
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
tee {log.cmd_build}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
## Unchoping the nodes (merges unitigs into single nodes)
/usr/bin/time -v -o {log.time_unchop} \
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
tee {log.cmd_unchop}
## Sorting the nodes
/usr/bin/time -v -o {log.time_sort} \
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
-i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
tee {log.cmd_sort}
## Exporting to gfa
/usr/bin/time -v -o {log.time_view} \
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
view -i $OGfile.unchoped.sorted.og -g \
1> {output.gfa} \
2> >(tee {log.cmd_view} >&2)
## Removing .og files for space savings
rm $(dirname {input})/*.og
## Adding metadata
python scripts/getTags.py \
--appdir {params.app_path} --config-file config.yaml \
> "$(dirname {input})/metadata.txt"
sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
# Compressing
# apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
# -@ {threads} -k {output.gfa}
"""
rule generate_graph_list:
# Generate a text file containing all created graphs
input:
expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
output:
"data/chrGraphs/graphsList.txt"
threads: 1
run:
with open(output[0], "w") as handle:
for file in input:
handle.write(file+"\n")
rule graph_squeeze:
# Using odgi to merge every subgraphs into a final one
input:
glist="data/chrGraphs/graphsList.txt",
graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
output:
"output/pan1c."+config['name']+".gfa"
log:
cmd="logs/squeeze/"+config['name']+".squeeze.cmd.log",
time="logs/squeeze/"+config['name']+".squeeze.time.log",
params:
app_path=config['app.path']
shell:
"""
/usr/bin/time -v -o {log.time} \
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
squeeze -t {threads} -O -P -f {input.glist} -o {output}.og 2>&1 | \
tee {log.cmd}
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
view -t {threads} -P -i {output}.og -g > {output}
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
rm {output}.og
"""
rule graph_stats:
# Using GFAstats to produce stats on every chromosome graphs
input:
graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
output:
genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
threads: 4
params:
app_path=config['app.path'],
pan_name=config['name']
shell:
"""
apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
-g {input.graph} -P \
-o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
-t {threads}
"""
rule graph_figs:
# Creating figures using odgi viz
input:
graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
output:
oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
threads: 4
params:
app_path=config['app.path'],
oneDviz=config['odgi.1Dviz.params'],
pcov=config['odgi.pcov.params']
shell:
"""
## 1D viz
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
## Pcov viz
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
"""
rule aggregate_graphs_stats:
# Reading and merging all stats files from chromosome graphs into a .tsv.
input:
genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
output:
genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
params:
app_path=config['app.path'],
pan_name=config['name']
shell:
"""
apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
--input $(dirname {input[0]}) --outputGeneral {output.genstats} \
--outputPaths {output.pathstats} --panname {params.pan_name}
"""
rule final_graph_tagging:
# Add metadata to the final GFA
input:
graph="output/pan1c."+config['name']+".gfa",
output:
"output/pan1c."+config['name']+".gfa.metadata"
threads: 1
params:
app_path=config['app.path'],
pan_name=config['name']
shell:
"""
python scripts/getTags.py \
--appdir {params.app_path} --config-file config.yaml > {output}
sed -i '/^H/r {output}' {input.graph}
"""
rule pggb_input_stats:
# Produces statistics on pggb input sequences
input:
flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
output:
"output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
params:
app_path=config['app.path'],
pan_name=config['name']
shell:
"""
apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
-f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
"""
rule core_statistics:
# Aggregate chrInput, chrGraph and pggb statistics into a single tsv
input:
chrInputStats = "output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
chrGraphStats = "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
tsv = "output/stats/pan1c."+config['name']+".core.stats.tsv",
dir = directory("output/pggb.usage.figs")
threads: 1
params:
app_path=config['app.path'],
pan_name=config['name']
shell:
"""
mkdir -p {output.dir}
apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
--pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
--chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
"""
"""
"""
rule get_pav:
# Create PAV matrix readable by panache for a given chromosome scale graph
input:
"data/chrGraphs/graphsList.txt"
output:
directory("output/pav.matrices")
threads: 16
params:
app_path=config['app.path']
run:
shell("mkdir {output}")
# Getting the list of graphs
with open(input[0]) as handle:
graphList = [graph.rstrip("\n") for graph in handle.readlines()]
# Iterating over graphs
for graph in graphList:
shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
rule panacus_stats:
# Produces panacus reports for a chromosome graph
input:
graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
output:
html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
log:
cmd="logs/panacus/{chromosome}.panacus.cmd.log",
time="logs/panacus/{chromosome}.panacus.time.log"
params:
app_path=config['app.path'],
pan_name=config['name'],
refname=config['reference']
threads: 8
shell:
"""
/usr/bin/time -v -o {log.time} \
bash scripts/getPanacusHG.sh \
-g {input.graph} \
-r $(basename {params.refname} .fa.gz) \
-d data/chrGraphs/{wildcards.chromosome} \
-o {output.html} \
-a {params.app_path} \
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
"""
rule create_pan1c_report_fig:
# Produces a markdown report figure of chromosomes graphs
input:
graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
contigfig="output/chr.contig/{chromosome}.contig.png",
output:
odgifig=temp("tmp/{chromosome}.odgi.png"),
namefig=temp("tmp/{chromosome}.name.png"),
reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
threads: 4
params:
app_path=config['app.path']
shell:
"""
## Odgi 1D viz
apptainer run --app odgi {params.app_path}/PanGeTools.sif \
viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
## Getting legend from contig figure
convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
odgheight=$(identify -ping -format '%h' {output.odgifig})
ctgheight=$(identify -ping -format '%h' {input.contigfig})
combinedheight=$(($odgheight+$ctgheight))
echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
## Creating empty canvas ad adding other figures to it
convert -size "3300x$combinedheight" xc:white {output.reportfig}
composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
"""
"""
Return 'create_pan1c_report' optional inputs to add them to the final report.
For example :
- SyRI figures on Assemblies
"""
sections = dict()
sections["metadata"] = "output/pan1c."+config['name']+".gfa.metadata"
sections["odgifigs"] = expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST)
sections["genstats"] = "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
sections["pathstats"] = "output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
if config["get_ASMs_SyRI"] == "True":
sections["SyRI_on_ASMs_figs"] = expand(
"output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
haplotype=SAMPLES_NOREF
)
rule create_pan1c_report:
# Produces a markdown report of chromosomes graphs
input:
app_path=config['app.path'],
add_SyRI=config['get_ASMs_SyRI']
shell("touch {output.report}")
# Adding Summary (made for importing in Joplin)
shell("echo '# Summary' >> {output.report}")
shell("echo '[TOC]' >> {output.report}")
shell("echo '' >> {output.report}")
# Adding graph construction info
## WIP
# Adding metadata
shell("echo '# Graph metadata' >> {output.report}")
shell("echo '# General stats' >> {output.report}")
shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.report}")
shell("echo '' >> {output.report}")
shell("echo '# Path stats' >> {output.report}")
shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.report}")
shell("echo '' >> {output.report}")
shell("echo '# Chromosome-scale odgi graphs' >> {output.report}")
for fig in fig_list:
basename=os.path.basename(fig)
chr_name=basename.split('.')[1]
shell("echo '## {chr_name}' >> {output.report}")
shell("echo '' >> {output.report}")
shell("echo '' >> {output.report}")
# Adding SyRI figs if produced
if params.add_SyRI:
fig_list = [fig for fig in input.SyRI_on_ASMs_figs]
fig_list.sort()
shell("echo '# SyRI on input assemblies' >> {output.report}")
for fig in fig_list:
basename = os.path.basename(fig)
hap_name = basename.split('.')[2:4]
shell("echo '## {hap_name[0]}, {hap_name[1]}' >> {output.report}")
shell("echo '' >> {output.report}")
shell("echo '' >> {output.report}")