答案
第二十六题
Introduction to Random Strings
代码块
import math
with open('D:/py_work/play/file/rosalind_prob.txt') as f :
f = f.read().split()
seq = f[0] #序列
frequency = [float(i) for i in f[1:]] #C-G对频率
freCG = [j/2 for j in frequency] #C、G单个碱基的频率
freAT = [(1-i)/2 for i in frequency] #A、T单个碱基的频率
result = []
j = 0
while j < len(freAT) : #循环次数
z = 0
for i in seq:
if i == 'C' or i == 'G':
z += math.log10(freCG[j])
else:
z += math.log10(freAT[j])
result.append(round(z,3))
j += 1
final = ' '.join(str(i) for i in result)
print(final)
第二十七题
Enumerating Oriented Gene Orderings
代码块
from itertools import permutations ,product
def all_num(n) :
f = 1
for i in range(2*n,0,-2):
f = f * i
return f
def seq(n) :
k = []
item = ((i, -i) for i in range(1, n + 1)) #元组(1,-1),(2,-2)...
for p in permutations(item, n):
# print(p)
l = [m for m in p]
# print(l)
s = list(product(range(2), repeat=n))
#print(s)
for i in range(len(s)):
for j in range(n):
mm = s[i][j]
print(p[j][mm], end=' ')
print('\t')
with open('D:/py_work/play/file/rosalind_sign.txt') as f :
f = f.read().split()
f = int(f[0])
print(all_num(f))
print(seq(f))
第二十八题
Finding a Spliced Motif
代码块
import re
with open('D:/py_work/play/file/rosalind_sseq.txt') as ds:
s = ''
for i in ds:
s += i.strip()
str_list = [i for i in re.findall(r'[TACG]+', s)]
begin = 0
for i in range(len(str_list[1])): #遍历要找的每个碱基
for j in range(begin, len(str_list[0])): #遍历母链
if str_list[1][i] == str_list[0][j]:
print(j+1, end=' ')
begin = j + 1
break
第二十九题
Transitions and Transversions
代码块
import re
with open('D:/py_work/play/file/rosalind_tran.txt') as f :
f = f.read().split()
s = ''
for i in f :
s += i
x = re.findall(r'[ATCG]+',s)
def tra(x): #转换
x_2 = ''
for i in x :
if i == 'A' :
i = 'G'
elif i == 'C' :
i = 'T'
elif i == 'G' :
i = 'A'
elif i == 'T' :
i = 'C'
x_2 += i
return x_2
y = tra(x[1])
l = len(x[0])
num = 0
mi = 0
for j in range(l) :
if x[0][j] == y[j] : #转换
num += 1
elif x[0][j] == x[1][j] : #颠换
mi += 1
mis = l - mi -num
print((num/mis))
第三十题
k-Mer Composition
代码块
import re
def fuction(n) : #制作key值
jianji = ['A','C','G','T']
result = []
for i in jianji :
for j in jianji :
for x in jianji :
for y in jianji :
result.append(''.join(i+j+x+y))
return result
with open('D:/py_work/play/file/rosalind_kmer.txt') as f :
l = []
s = ''
for i in f :
s += i.rstrip()
l = [re.findall(r'[ACGT]+',s)][0]
length = l[0]
#4个相邻碱基为一组k-mer
def consume(length) :
result = [0] * 4 ** 4 #k=4时的碱基表,例【AAAA,AAAC,AAAG,AAAT,AACA,AAGA,......,TTTT】
dict_1 = dict(zip(list(fuction(1)), list(range(4 ** 4)))) # 做字典
i = 0
while i <= len(length) - 4 :
k_mer = length[i:i +4]
index = dict_1[k_mer]
result[index] += 1
i += 1
return result
result = consume(length)
export = ' '.join(str(i) for i in result)
print(export)
https://rosalind.info/problems/
https://www.bilibili.com/read/readlist/rl58094?spm_id_from=333.999.0.0
https://www.zhihu.com/people/shan-shang-fa-shen-jing-93/posts