nf-core · dialvarezs · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#896](https://github.com/nf-core/mag/pull/896) - Remove obsolete execution command from README (by @dialvarezs)
 - [#907](https://github.com/nf-core/mag/pull/907) - Include refined bins from all binners in the `DASTool/bins` output folder (by @AlexHoratio)
+- [#911](https://github.com/nf-core/mag/pull/911) - Ensure column order is consistent when generating depth summaries to prevent swapped results on merged depth summary (by @dialvarezs)
 
 ### `Dependencies`
 

@@ -1,15 +1,14 @@
 #!/usr/bin/env python
 
-## Originally written by Sabrina Krakau and released under the MIT license.
+## Originally written by Sabrina Krakau and updated by Diego Alvarez. Released under the MIT license.
 ## See git repository (https://github.com/nf-core/mag) for full license text.
 
-import sys
 import argparse
-import os.path
-import pandas as pd
 import csv
 import gzip
+import os.path
 import statistics
+import sys
 
 from Bio import SeqIO
 
@@ -29,17 +28,14 @@ def parse_args(args=None):
         "--depths",
         required=True,
         metavar="FILE",
-        help="(Compressed) TSV file containing contig depths for each sample: contigName, contigLen, totalAvgDepth, sample1_avgDepth, sample1_var [, sample2_avgDepth, sample2_var, ...].",
-    )
-    parser.add_argument(
-        "-a", "--assembler", required=True, type=str, help="Assembler name."
-    )
-    parser.add_argument(
-        "-i", "--id", required=True, type=str, help="Sample or group id."
-    )
-    parser.add_argument(
-        "-m", "--binner", required=True, type=str, help="Binning method."
+        help=(
+            "Produces (compressed) TSV file containing contig depths for each sample: contigName, contigLen, "
+            + "totalAvgDepth, sample1_avgDepth, sample1_var [, sample2_avgDepth, sample2_var, ...]."
+        ),
     )
+    parser.add_argument("-a", "--assembler", required=True, type=str, help="Assembler name.")
+    parser.add_argument("-i", "--id", required=True, type=str, help="Sample or group id.")
+    parser.add_argument("-m", "--binner", required=True, type=str, help="Binning method.")
     return parser.parse_args(args)
 
 
@@ -53,57 +49,48 @@ def main(args=None):
     sample_names = []
     dict_contig_depths = {}
     with gzip.open(args.depths, "rt") as infile:
-        reader = csv.reader(infile, delimiter="\t")
-        # process header
-        header = next(reader)
-        for sample in range(int((len(header) - 3) / 2)):
-            col_name = header[3 + 2 * sample]
+        reader = csv.DictReader(infile, delimiter="\t")
+        # process header to extract sample names from column names
+        depth_columns = []
+        for col_name in reader.fieldnames[3::2]:  # Every 2nd column starting from index 3
             # retrieve sample name: "<assembler>-<id>-<other sample_name>.bam"
             sample_name = col_name[len(args.assembler) + 1 + len(args.id) + 1 : -4]
             sample_names.append(sample_name)
+            depth_columns.append(col_name)
         # process contig depths
         for row in reader:
-            contig_depths = []
-            for sample in range(int((len(row) - 3) / 2)):
-                contig_depths.append(float(row[3 + 2 * sample]))
-            dict_contig_depths[str(row[0])] = contig_depths
-
-    # Initialize output files
-    n_samples = len(sample_names)
-    with open(
-        args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "w"
-    ) as outfile:
-        print("bin", "\t".join(sample_names), sep="\t", file=outfile)
-
-    # for each bin, access contig depths and compute mean bin depth (for all samples)
-    for file in args.bins:
-        all_depths = [[] for i in range(n_samples)]
-
-        if file.endswith(".gz"):
-            with gzip.open(file, "rt") as infile:
-                for rec in SeqIO.parse(infile, "fasta"):
-                    contig_depths = dict_contig_depths[rec.id]
-                    for sample in range(n_samples):
-                        all_depths[sample].append(contig_depths[sample])
-        else:
-            with open(file, "rt") as infile:
-                for rec in SeqIO.parse(infile, "fasta"):
-                    contig_depths = dict_contig_depths[rec.id]
-                    for sample in range(n_samples):
-                        all_depths[sample].append(contig_depths[sample])
-
-        binname = os.path.basename(file)
-        with open(
-            args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "a"
-        ) as outfile:
-            print(
-                binname,
-                "\t".join(
-                    str(statistics.median(sample_depths))
-                    for sample_depths in all_depths
-                ),
-                sep="\t",
-                file=outfile,
+            contig_depths = {}
+            for sample_name, col_name in zip(sample_names, depth_columns):
+                contig_depths[sample_name] = float(row[col_name])
+            dict_contig_depths[row[reader.fieldnames[0]]] = contig_depths
+
+    sample_names = sorted(sample_names)
+
+    with open(f"{args.assembler}-{args.binner}-{args.id}-binDepths.tsv", "w") as outfile:
+        writer = csv.writer(outfile, delimiter="\t")
+        writer.writerow(["bin", *sample_names])
+
+        # for each bin, access contig depths and compute mean bin depth (for all samples)
+        for file in args.bins:
+            all_depths = {sample: [] for sample in sample_names}
+
+            if file.endswith(".gz"):
+                with gzip.open(file, "rt") as infile:
+                    for rec in SeqIO.parse(infile, "fasta"):
+                        contig_depths = dict_contig_depths[rec.id]
+                        for sample in sample_names:
+                            all_depths[sample].append(contig_depths[sample])
+            else:
+                with open(file, "rt") as infile:
+                    for rec in SeqIO.parse(infile, "fasta"):
+                        contig_depths = dict_contig_depths[rec.id]
+                        for sample in sample_names:
+                            all_depths[sample].append(contig_depths[sample])
+
+            binname = os.path.basename(file)
+
+            writer.writerow(
+                [binname, *[statistics.median(all_depths[sample]) for sample in sample_names]],
             )