Сделать алгоритм Смита ватерманна работать быстрее


####make the BLOSUM matrix into a class for easier retrieval
class BLOSUM62():
    def __init__(self):
        with open('BLOSUM62.txt') as f1:
            entries = [(line.strip()).split() for line in f1.readlines()]
            self.scoring_matrix = {(entry[0],entry[1]):int(entry[2]) for entry in entries}

    def __getitem__(self, pair):
        return self.scoring_matrix[pair[0],pair[1]]

def local_alignment(v,w,scoring_matrix,gap_start, gap_extend):

    ###initialize the 3 matrices now 
    M = [[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]
    X = [[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]
    Y = [[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]
    back_track = [[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]


    ###initialize the maximum scores
    max_score = -1
    max_i, max_j = 0,0


    ###populate all three matrices 
    for i in xrange(1,(len(v)+1)):
        for j in xrange(1, (len(w)+1)):

            Y[i][j] = max([Y[i-1][j] - gap_extend, M[i-1][j] - gap_start])
            X[i][j] = max([X[i][j-1] - gap_extend, M[i][j-1] - gap_start])
            cur_scores = [Y[i][j], M[i-1][j-1]+scoring_matrix[v[i-1],w[j-1]], X[i][j],0]

            M[i][j] = max(cur_scores)
            back_track[i][j] = cur_scores.index(M[i][j])
            # print 'chose back_track value as'
            # print back_track[i][j]

            if M[i][j] > max_score:
                max_score = M[i][j]
                max_i, max_j = i, j

    print 'Finished making the matrix'
    # print 'M'
    # print M
    # print 'backtrack'
    # print back_track
    # Initialize the indices to start at the position of the high score.
    i, j = max_i, max_j

    # Initialize the aligned strings as the input strings up to the position of the high score.
    v_aligned, w_aligned = v[:i], w[:j]

    # Backtrack to start of the local alignment starting at the highest scoring cell.
    #while back_track[i][j] != 3 and i*j != 0 and i<=j:
    while back_track[i][j] != 3 and i*j != 0 and i>=j:
        if back_track[i][j] == 0:
            i -= 1
        elif back_track[i][j] == 1:
            i -= 1
            j -= 1
        elif back_track[i][j] == 2:
            j -= 1


    print 'finished backtracking'
    # Cut the strings at the ending point of the backtrack.
    v_aligned = v_aligned[i:]
    w_aligned = w_aligned[j:]


    return str(max_score), v_aligned, w_aligned


###now run the thing
#word1 = 'PLEASANTLY'
#word2 = 'MEANLY'

f1 = open('rosalind_laff.txt','r')
indata = f1.readlines()
f1.close()

###add '>' to last line 
indata.append('>')

word1 = ''
word2 = ''
linenum = 0
chunk = []
for line in indata:
    line = line.strip()
    linenum = linenum+1
    if line=='':
        pass
    else:
        if line[0]=='>' :
            if linenum==1:
                chunk = []
            elif linenum>1 and linenum!=len(indata):
                for i in range(0,len(chunk)):
                    word1 = word1+chunk[i]                  
                chunk = []

            else:
                for i in range(0,len(chunk)):
                    word2 = word2+chunk[i]

        else:
            chunk.append(line)

# word1 = 'PLEASANTLY'
# word2 = 'MEANLY'  

# Get the local alignment (given sigma = 11, epsilon = 1 in problem statement).
alignment = local_alignment(word1, word2, BLOSUM62(), 11, 1)

# Print and save the answer.
print '\n'.join(alignment)

f1 = open('out_localali_test.txt','w')
f1.write(str(alignment))
f1.close()

Это рабочий код алгоритма Смита ватерманна, но это слишком медленно, чтобы запустить.Любые предложения, чтобы ускорить его? Основная часть-это запудрены часть его, как его рекурсивным.

То, что отступать не- откаты выглядит в матрице начиная с I,J, который сейчас показатели максимальный балл вычисляется itearting как я-1, ю-1 пока они не найдут bbackstarck[я][Дж]==3 мы проверяем стоимость запудрены[я][Дж[ рекурсивно



120
2
задан 14 февраля 2018 в 08:02 Источник Поделиться
Комментарии