#the purpose of this script is to take in a Jpred input and to only spit out the peptide corresponding to the first alpha helix
#input sequence is two lines - the first line is the amino acid sequence, and the second line is the secondary prediction

def helix(protein_sequence, jpred):

    first_helix = ""    #this will hold the peptide sequence of the first helix

    length = len(protein_sequence)
    #print("length of protein is: ", length)
    #print("protein sequence is: ", protein_sequence)
    #print("jpred is: ", jpred)

    residue_index = 0

    while residue_index < length:
        #print("residue index is: ", residue_index)
        #print("protein sequence residue is: ", protein_sequence[residue_index])
        #print("jpred for that residue is: ", jpred[residue_index])
        # Find the first instance where the protein sequence's secondary prediction is a helix, then look ahead and if the next residue's prediction is not a helix, then break out of the while loop    
        if jpred[residue_index] == "H":
            first_helix += protein_sequence[residue_index]
            #print("first_helix is: ", first_helix)
            if jpred[residue_index + 1] != "H":
                break
    
        residue_index += 1
    
        #print("first helix is: ", first_helix)
    return first_helix

#end function

import sys
try:
    in_file = open(sys.argv[1], "rt")
    out_file = open(sys.argv[2], "wt")
except IndexError:
    sys.exit("Usage: firsthelix <input_file.txt> <output_file.txt>") 

lines = in_file.readlines()     #makes the list "lines" where each element is a line in the in_file

i = 0
length = len(lines)

lastline_length = len(lines[length - 1])
if lines[length - 1][lastline_length - 1] != "\n":          #adds a new line at the end of the input file if there isn't one, to avoid missing the last character of the peptide
    lines[length - 1] = lines[length - 1] + "\n"

while i < length:
    if lines[i][0] == ">":
        #print("name of the protein is: ", lines[i][:-1])
        #print("protein sequence starts with: ", lines[i + 1][:10])
        #print("jpred starts with: ", lines[i + 2][:10])
        print("the first predicted alpha helix sequence is: ", helix(lines[i + 1][:-1], lines[i + 2][:-1]))
        out_file.write(">" + lines[i][1:-1] + "\n" + helix(lines[i + 1][:-1], lines[i + 2][:-1]) + "\n\n")
        i += 1
    else:
        i += 1  #skip ahead to next line until you find the fasta header

in_file.close()
out_file.close()

