RNAlib-2.4.14
Python Examples

MFE Prediction (flat interface)

1 import RNA
2 
3 # The RNA sequence
4 seq = "GAGUAGUGGAACCAGGCUAUGUUUGUGACUCGCAGACUAACA"
5 
6 # compute minimum free energy (MFE) and corresponding structure
7 (ss, mfe) = RNA.fold(seq)
8 
9 # print output
10 print "%s\n%s [ %6.2f ]" % (seq, ss, mfe)

MFE Prediction (object oriented interface)

1 import RNA;
2 
3 sequence = "CGCAGGGAUACCCGCG"
4 
5 # create new fold_compound object
6 fc = RNA.fold_compound(sequence)
7 
8 # compute minimum free energy (mfe) and corresponding structure
9 (ss, mfe) = fc.mfe()
10 
11 # print output
12 print "%s [ %6.2f ]" % (ss, mfe)

Suboptimal Structure Prediction

1 import RNA
2 
3 sequence = "GGGGAAAACCCC"
4 
5 # Set global switch for unique ML decomposition
6 RNA.cvar.uniq_ML = 1
7 
8 subopt_data = { 'counter' : 1, 'sequence' : sequence }
9 
10 # Print a subopt result as FASTA record
11 def print_subopt_result(structure, energy, data):
12  if not structure == None:
13  print ">subopt %d" % data['counter']
14  print "%s" % data['sequence']
15  print "%s [%6.2f]" % (structure, energy)
16  # increase structure counter
17  data['counter'] = data['counter'] + 1
18 
19 # Create a 'fold_compound' for our sequence
20 a = RNA.fold_compound(sequence)
21 
22 # Enumerate all structures 500 dacal/mol = 5 kcal/mol arround
23 # the MFE and print each structure using the function above
24 a.subopt_cb(500, print_subopt_result, subopt_data);

Boltzmann Sampling (a.k.a. Probabilistic Backtracing)

1 import RNA
2 
3 sequence = "UGGGAAUAGUCUCUUCCGAGUCUCGCGGGCGACGGGCGAUCUUCGAAAGUGGAAUCCGUACUUAUACCGCCUGUGCGGACUACUAUCCUGACCACAUAGU"
4 
5 """
6 A simple callback function that stores
7 a structure sample into a list
8 """
9 def store_structure(s, data):
10  if s:
11  data.append(s)
12 
13 
14 """
15 First we prepare a fold_compound object
16 """
17 
18 # create model details
19 md = RNA.md()
20 
21 # activate unique multibranch loop decomposition
22 md.uniq_ML = 1
23 
24 # create fold compound object
25 fc = RNA.fold_compound(sequence, md)
26 
27 # compute MFE
28 (ss, mfe) = fc.mfe()
29 
30 # rescale Boltzmann factors according to MFE
31 fc.exp_params_rescale(mfe)
32 
33 # compute partition function to fill DP matrices
34 fc.pf()
35 
36 """
37 Now we are ready to perform Boltzmann sampling
38 """
39 
40 # 1. backtrace a single sub-structure of length 10
41 print "%s" % fc.pbacktrack5(10)
42 
43 
44 # 2. backtrace a single sub-structure of length 50
45 print "%s" % fc.pbacktrack5(50)
46 
47 # 3. backtrace multiple sub-structures of length 10 at once
48 for s in fc.pbacktrack5(20, 10):
49  print "%s" % s
50 
51 # 4. backtrace multiple sub-structures of length 50 at once
52 for s in fc.pbacktrack5(100, 50):
53  print "%s" % s
54 
55 # 5. backtrace a single structure (full length)
56 print "%s" % fc.pbacktrack()
57 
58 # 6. backtrace multiple structures at once
59 for s in fc.pbacktrack(100):
60  print "%s" % s
61 
62 # 7. backtrace multiple structures non-redundantly
63 for s in fc.pbacktrack(100, RNA.PBACKTRACK_NON_REDUNDANT):
64  print "%s" % s
65 
66 # 8. backtrace multiple structures non-redundantly (with resume option)
67 num_samples = 500
68 iterations = 15
69 d = None # pbacktrack memory object
70 s_list = list()
71 
72 for i in range(0, iterations):
73  d, ss = fc.pbacktrack(num_samples, d, RNA.PBACKTRACK_NON_REDUNDANT)
74  s_list = s_list + list(ss)
75 
76 for s in s_list:
77  print "%s" % s
78 
79 # 9. backtrace multiple sub-structures of length 50 in callback mode
80 ss = list()
81 i = fc.pbacktrack5(100, 50, store_structure, ss)
82 
83 for s in ss:
84  print "%s" % s
85 
86 # 10. backtrace multiple full-length structures in callback mode
87 ss = list()
88 i = fc.pbacktrack(100, store_structure, ss)
89 
90 for s in ss:
91  print "%s" % s
92 
93 # 11. non-redundantly backtrace multiple full-length structures in callback mode
94 ss = list()
95 i = fc.pbacktrack(100, store_structure, ss, RNA.PBACKTRACK_NON_REDUNDANT)
96 
97 for s in ss:
98  print "%s" % s
99 
100 # 12. non-redundantly backtrace multiple full length structures
101 # in callback mode with resume option
102 ss = list()
103 d = None # pbacktrack memory object
104 
105 for i in range(0, iterations):
106  d, i = fc.pbacktrack(num_samples, store_structure, ss, d, RNA.PBACKTRACK_NON_REDUNDANT)
107 
108 for s in ss:
109  print "%s" % s

RNAfold -p –MEA equivalent

1 #!/usr/bin/python
2 #
3 
4 import RNA
5 
6 seq = "AUUUCCACUAGAGAAGGUCUAGAGUGUUUGUCGUUUGUCAGAAGUCCCUAUUCCAGGUACGAACACGGUGGAUAUGUUCGACGACAGGAUCGGCGCACUA"
7 
8 # create fold_compound data structure (required for all subsequently applied algorithms)
9 fc = RNA.fold_compound(seq)
10 
11 # compute MFE and MFE structure
12 (mfe_struct, mfe) = fc.mfe()
13 
14 # rescale Boltzmann factors for partition function computation
15 fc.exp_params_rescale(mfe)
16 
17 # compute partition function
18 (pp, pf) = fc.pf()
19 
20 # compute centroid structure
21 (centroid_struct, dist) = fc.centroid()
22 
23 # compute free energy of centroid structure
24 centroid_en = fc.eval_structure(centroid_struct)
25 
26 # compute MEA structure
27 (MEA_struct, MEA) = fc.MEA()
28 
29 # compute free energy of MEA structure
30 MEA_en = fc.eval_structure(MEA_struct)
31 
32 # print everything like RNAfold -p --MEA
33 print("%s\n%s (%6.2f)" % (seq, mfe_struct, mfe))
34 print("%s [%6.2f]" % (pp, pf))
35 print("%s {%6.2f d=%.2f}" % (centroid_struct, centroid_en, dist))
36 print("%s {%6.2f MEA=%.2f}" % (MEA_struct, MEA_en, MEA))
37 print(" frequency of mfe structure in ensemble %g; ensemble diversity %-6.2f" % (fc.pr_structure(mfe_struct), fc.mean_bp_distance()))

Fun with Soft Constraints

1 import RNA
2 
3 seq1 = "CUCGUCGCCUUAAUCCAGUGCGGGCGCUAGACAUCUAGUUAUCGCCGCAA"
4 
5 # Turn-off dangles globally
6 RNA.cvar.dangles = 0
7 
8 # Data structure that will be passed to our MaximumMatching() callback with two components:
9 # 1. a 'dummy' fold_compound to evaluate loop energies w/o constraints, 2. a fresh set of energy parameters
10 mm_data = { 'dummy': RNA.fold_compound(seq1), 'params': RNA.param() }
11 
12 # Nearest Neighbor Parameter reversal functions
13 revert_NN = {
14  RNA.DECOMP_PAIR_HP: lambda i, j, k, l, f, p: - f.eval_hp_loop(i, j) - 100,
15  RNA.DECOMP_PAIR_IL: lambda i, j, k, l, f, p: - f.eval_int_loop(i, j, k, l) - 100,
16  RNA.DECOMP_PAIR_ML: lambda i, j, k, l, f, p: - p.MLclosing - p.MLintern[0] - (j - i - k + l - 2) * p.MLbase - 100,
17  RNA.DECOMP_ML_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (l - k - 1) * p.MLbase,
18  RNA.DECOMP_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (j - i - k + l) * p.MLbase,
19  RNA.DECOMP_ML_ML: lambda i, j, k, l, f, p: - (j - i - k + l) * p.MLbase,
20  RNA.DECOMP_ML_ML_ML: lambda i, j, k, l, f, p: 0,
21  RNA.DECOMP_ML_UP: lambda i, j, k, l, f, p: - (j - i + 1) * p.MLbase,
22  RNA.DECOMP_EXT_STEM: lambda i, j, k, l, f, p: - f.E_ext_loop(k, l),
23  RNA.DECOMP_EXT_EXT: lambda i, j, k, l, f, p: 0,
24  RNA.DECOMP_EXT_STEM_EXT: lambda i, j, k, l, f, p: - f.E_ext_loop(i, k),
25  RNA.DECOMP_EXT_EXT_STEM: lambda i, j, k, l, f, p: - f.E_ext_loop(l, j),
26  RNA.DECOMP_EXT_EXT_STEM1: lambda i, j, k, l, f, p: - f.E_ext_loop(l, j-1),
27  }
28 
29 # Maximum Matching callback function (will be called by RNAlib in each decomposition step)
30 def MaximumMatching(i, j, k, l, d, data):
31  return revert_NN[d](i, j, k, l, data['dummy'], data['params'])
32 
33 # Create a 'fold_compound' for our sequence
34 fc = RNA.fold_compound(seq1)
35 
36 # Add maximum matching soft-constraints
37 fc.sc_add_f(MaximumMatching)
38 fc.sc_add_data(mm_data, None)
39 
40 # Call MFE algorithm
41 (s, mm) = fc.mfe()
42 
43 # print result
44 print "%s\n%s (MM: %d)\n" % (seq1, s, -mm)
45