#!/usr/local/bin/python3

#this program calculates the hydrophobic moment, or amphipathicity of an alpha helix
#this is an updated version of the program that can take in arguments from the command line
#the usage will be $ <program_name> <input_file> <output_file>
#sys.arg[0] is program_name, [1] is <input_file>, and [2] is <output_file>


def amphipathicity(peptide, name):

    #this sets up the hydrophobicity dictionary, according to Fauchere and Pliska 1983
    hydropdict = {
        'A' : 0.310,
        'D' : -0.770,
        'E' : -0.640,
        'I' : 1.800,
        'M' : 1.230,
        'S' : -0.040,
        'Y' : 0.960,
        'R' : -1.010,
        'C' : 1.540,
        'G' : 0.000,
        'L' : 1.700,
        'F' : 1.790,
        'T' : 0.260,
        'V' : 1.220,
        'N' : -0.600,
        'Q' : -0.220,
        'H' : 0.130,
        'K' : -0.990,
        'P' : 0.720,
        'W' : 2.250
    }

    #making sure the dictionary is right
    #for index in hydropdict:
    #    print(index,"'s hydrophobicity is: ", hydropdict[index])

    import math
    
    peptide = peptide.upper()   #make sure it's uppercase
    placeholder = peptide       #need to make a placeholder string in order to remove gaps if present
    
    chars = ""                      #resets string variable for holding invalid characters
    gaps = ""                       #resets string variable for saying if there were gaps
    invalid_char = False            #resets boolean variable for indicating if there are invalid characters
    
    for letter in placeholder:      #Runs through the peptide letter by letter, making sure it has appropriate amino acids and also removes gaps
        if letter != 'A' and letter != 'D' and letter != 'E' and letter != 'I' and letter != 'M' and letter != 'S' and letter != 'Y' and letter != 'R' and letter != 'C' and letter != 'G' and letter != 'L' and letter != 'F' and letter != 'T' and letter != 'V' and letter != 'N' and letter != 'Q' and letter != 'H' and letter != 'K' and letter != 'P' and letter != 'W':
            if letter == "-":
                peptide = placeholder.replace(letter, "")     #deletes all gapped positions in the string
                gaps = "\t gaps removed, new peptide: " + peptide             #indicates that that peptide had gaps, and are now removed, and prints the new peptide without gaps
            else:
                invalid_char = True
                chars += letter + ", "
                
    if invalid_char:
        return "N/A \t invalid character(s) found in sequence: " + chars  #amphipathicity is returned as N/A, and notes column says what the invalid character was

#    print(peptide)
    length = len(peptide)
    count = 0
    i = 0
    j = 0
    while count < length:
#        print("working on residue ", peptide[count])
        i += hydropdict[peptide[count]] * math.sin((count + 1) * math.radians(100))
        j += hydropdict[peptide[count]] * math.cos((count + 1) * math.radians(100))
        count += 1

    amphipathic_moment = ((i ** 2 + j ** 2) ** 0.5) / length

#    print("the amphipathic moment of the ", name, " helix is: ", amphipathic_moment)
    return str(amphipathic_moment) + gaps

#************end function************

import sys

try:    #makes sure that the arguments were entered correctly
    in_file = open(sys.argv[1], "rt")                           #opens the input file "input.txt"
    out_file = open(sys.argv[2], "wt")                         #creates a new file called "output.txt"
except IndexError:
    sys.exit("Usage: amphihelix_0.2.1.py <input_file.txt> <output_file.txt>")
          
out_file.write("name \t peptide \t amphipathiticy \t notes\n")       #writes the header of the new output file, which will be tab-delimited

lines = in_file.readlines()                                 #creates the list 'lines' that has each line in input.txt as an element
#print(lines)
length = len(lines)

lastline_length = len(lines[length - 1])
if lines[length - 1][lastline_length - 1] != "\n":          #adds a new line at the end of the input file if there isn't one, to avoid missing the last character of the peptide
    lines[length - 1] = lines[length - 1] + "\n"

i = 0
while i < length:
    if lines[i][0] == '>':                                  #finds the next FASTA header sequence
        print("the amphipathicity of peptide ", lines[i + 1][:-1], "from sample ", lines[i][:-1], "is:")
        print(amphipathicity(lines[i + 1][:-1], lines[i][:-1]))
        #now write the amphipathicity information in the output file
        out_file.write(lines[i][1:-1] + "\t" + lines[i + 1][:-1] + "\t" + amphipathicity(lines[i + 1][:-1], lines[i][:-1]) + "\n")
        i += 1
    else:                                                   #if this line is not a FASTA header, move to the next line
        i += 1

in_file.close()
out_file.close()

print("")
print("*****************************************************************")
print("         Thanks for using amphihelix.py, version 0.2.2")
print("This script was written by Onur Erbilgin while in the Kerfeld Lab")
print("                   http://www.kerfeldlab.org")
print("*****************************************************************")