Skip to content

Instantly share code, notes, and snippets.

@marcelm
Created March 7, 2023 09:42

Revisions

  1. marcelm created this gist Mar 7, 2023.
    38 changes: 38 additions & 0 deletions qualtrim.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,38 @@
    #!/usr/bin/env python3
    """Quality trimming using a running sum from the 5' to 3' end"""

    import sys
    from argparse import ArgumentParser
    import dnaio


    def qual_trim_index(qualities_ascii, threshold):
    qualities = [ord(c) - 33 for c in qualities_ascii]
    score = 0
    best_end = 0
    best_score = 0
    for i, q in enumerate(qualities):
    score += q - threshold
    if score > best_score:
    best_end = i + 1
    best_score = score

    return best_end


    def main():
    parser = ArgumentParser()
    parser.add_argument("-q", dest="threshold", metavar="THRESHOLD", type=int, default=30, help="Quality trimming threshold")
    parser.add_argument("fastq")
    args = parser.parse_args()
    threshold = args.threshold
    with dnaio.open(args.fastq) as infile:
    with dnaio.open(sys.stdout.buffer, mode="w", qualities=True) as outfile:
    for record in infile:
    end = qual_trim_index(record.qualities, threshold)
    record = record[:end]
    outfile.write(record)


    if __name__ == "__main__":
    main()