#this script takes in the emails that jpred sent and merges them into something more useful - that is, a fasta file with the secondary predictions (fast2?)

import sys

try:
    in_file = open(sys.argv[1], "rt")
    out_file = open(sys.argv[2], "wt")
except IndexError:
    sys.exit("Usage: jpred0.2.2.py <input_file> <output_file>")

def concatenate(file_lines, start_line, end_line):
    #file_lines is the number of lines in the file
    #start_line is the first line (index) to start analysis, should begin with defining feature
    #end_line is last line (index) to do analysis on, should immediately precede the next defining feature
    
    #this loop will actually do the concatenation
    query = ""
    jpred =""
    j = start_line
    while j < end_line:
        #print("j: ", j)
        #print("file_lines[j][0:4]: ", file_lines[j][0:4])
        if file_lines[j][0:5] == "Query":
            query += file_lines[j][11:-2]
            j += 1
        elif file_lines[j][0:5] == "Jpred":
            jpred += file_lines[j][11:-2]
            j += 1
        elif file_lines[j][0:5] == "Conf:":
            j += 1
        else:
            j += 1

    #print("query is: ", query)
    #print("jpred is: ", jpred)
    return query + "\n" + jpred + "\n\n"
    
    
#this block of code will read through the lines in the file and determine what lines the block of sequence information is in

lines = in_file.readlines()
file_length = len(lines)

#finds the block of text, index 0 to 1 less than 'endline'  
def findblock(start_line, file_length, lines):      
    i = start_line
    #print("i: ", i)
    end_line = 1
    while i + end_line < file_length:
        if lines[i][0:7] == "Subject":
            #print("i + end_line: ", i, "+", end_line, "=", i + end_line)
            if lines[i + end_line][0:7] != "Subject":
                end_line += 1
                #print("end_line: ", end_line)
            else:
                break
        else:
            i += 1
        
    #print("returning as the last line in the block: ", end_line + start_line)
    return end_line + start_line

start_line = 0
while start_line < file_length:
    if lines[start_line][0:7] == "Subject":
        end_line = findblock(start_line, file_length, lines)
        #print("start_line: ", start_line)
        #print("end_line: ", end_line)
        #print("sending line range to concatenate: ")
        name = ">" + lines[start_line][19:-12] + "\n"
        #print("for ", name, ":")
        sequence = concatenate(lines, start_line, end_line)
        output = name + sequence
        print(output)
        out_file.write(output)
        start_line = end_line
        #print("start_line: ", start_line)
    else:
        start_line += 1
    
#to find out how many sequences there are:
sequences = 0
for line in lines:
    if line[0:7] == "Subject":
        sequences += 1
print("Number of sequences organized: ", sequences)

in_file.close()
out_file.close()