-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtask.py
More file actions
93 lines (67 loc) · 2.99 KB
/
task.py
File metadata and controls
93 lines (67 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import re
from itertools import islice
region_length = 20
with open("example.sub") as f:
c = f.read().strip().split("\n")
i = 0
strand = "".join(filter(str.isalpha, c[1]))
for line in c:
c[i] = re.sub(r'[^\(\)\.]', "", line)
i += 1
del c[:2], c[-1]
row_length = len(c[0]) - 1 # should be the same for every row
column_scores = []
def mean(t):
return sum(t) / region_length
def get_melting_temperature(seq):
nA, nU = seq.count("A"), seq.count("U")
nG, nC = seq.count("G"), seq.count("C")
if len(seq) > 14:
return round(64.9 + 41 * (nG + nC - 16.4) / (nA + nU + nG + nC), 2)
return round((nA + nU) * 2 + (nG + nC) * 4, 2)
def get_complementary_dna(seq):
complements = {"A": "U", "U": "A", "G": "C", "C": "G"}
return "".join(complements[n] for n in seq)
def normalize_temp(temp, min, max):
return (temp - min) / (max - min)
# calculate the average values for every column
for i in range(row_length):
column_scores.append(0)
k = 0
for j in c:
k += 1
if j[i] == ".":
column_scores[i] += 1
column_scores[i] = column_scores[i] / k
average_values = {}
# calculate the average values (of number on index from i to region_length) from the previously created column scores list
for i in range(row_length):
t = column_scores[i:][:region_length]
if len(t) == 0:
break
average_values[i] = mean(t)
# find out the index of highest average value
index = max(average_values, key=average_values.get)
# find out the highest average value
highest_average = average_values[index]
# separate the strand
final_strand = strand[index:][:region_length]
print(f"Best region: {final_strand}\tstarting from position: {index + 1}")
print(f"Melting temperature: {get_melting_temperature(final_strand)}°C")
print(f"Complementary sequence: {"".join(get_complementary_dna(final_strand))}")
print("\n\n\ntask 2: taking the melting temperature into account and calculating the score (might not work properly)")
melting_temperatures = {}
for i in range(len(strand)):
t = strand[i:][:region_length]
melting_temperatures[i] = get_melting_temperature(t) # calculate melting temperature for every strand and put it into dictionary; key - strand starting position; value - temperature
# min / max temperatures from the dictionary in order to further normalize the temps
min_temp = min(list(melting_temperatures.values()))
max_temp = max(list(melting_temperatures.values()))
for t in list(melting_temperatures.keys()):
melting_temperatures[t] = (melting_temperatures[t] - min_temp) / (max_temp - min_temp) # normalize and replace the value in the dict
score = {}
for i in range(row_length):
score[i] = round(0.3 * average_values[i] + 0.7 * list(melting_temperatures.values())[i], 7) # calculate the score
sorted_score = dict(sorted(score.items(), key=lambda item: item[1], reverse=True)) # sort dictionary by value (temperature)
print("key - strand starting position; value - score")
print(list(islice(sorted_score.items(), 5))) # top 5 scores