import sys #GC_content: Loop through the sequence array, calculate the gc values, output the values def GC_content(fasta_file): #Loop through the file and create a sequence array sequences = {} with open(fasta_file, "r") as file: name = "" for line in file: line = line.strip() if line.startswith(">"): name_sub1 = line[1:] name_sub2 = name_sub1.split(" ") name = name_sub2[0] sequences[name] = "" else: sequences[name] += line #Calculate the gc values, output the values print("Gene stable ID" + "\t" + "Transcript stable ID" + "\t" + "Gene % GC content" + "\t" + "GC_ratio" + "\t" + "AT_ratio") for transcript, sequence in sequences.items(): gc_count = sequence.count("G") + sequence.count("C") at_count = sequence.count("A") + sequence.count("T") gc_ratio = gc_count / at_count at_ratio = at_count / gc_count gc_content = (100 * gc_count) / (gc_count + at_count) gc_content_str = "{:.2f}".format(gc_content) parsed_string = transcript.split("_") gene_model = parsed_string[0] print(gene_model + "\t" + transcript + "\t" + gc_content_str + "\t" + str(gc_ratio) + "\t" + str(at_ratio)) #Call the GC_content function GC_content(sys.argv[1])