00001
00002
00003
00004 #ifndef WEIGHTED_FOOTPRINT_DETECTOR_H
00005 #define WEIGHTED_FOOTPRINT_DETECTOR_H
00006
00007 #include <set>
00008 #include <string>
00009 #include <map>
00010 #include <list>
00011 #include <gmpxx.h>
00012 #include "weighted_bitsets.h"
00013 #include "weighted_bit_multi_set.h"
00014 #include "bit_multi_set.h"
00015 #include "triple.h"
00016
00017
00018
00019
00020
00021
00022
00023
00026 namespace bbq {
00027
00029 typedef std::list< std::triple<int,int,bool> > match_list;
00030 typedef std::map<std::string,std::list<int> > group_to_index_list;
00031
00041 template<class w_bit_set>
00042 class weighted_footprint_detector
00043 {
00044
00045 public:
00046
00050 typedef std::set< w_bit_set > arrangement;
00051 typedef std::pair< typename arrangement::iterator , typename arrangement::iterator > arrangement_range;
00052 typedef std::set< w_bit_set > cell_set;
00053
00054 private:
00055
00059 int k;
00060
00064 int kappa;
00065
00067 int K;
00068
00071 char** s;
00072
00079 int num_of_group_characters;
00080
00085 char** group_names;
00086
00091 int* group_sizes;
00092
00098 int** group_indices;
00099
00107
00108
00110 char** s_names;
00111
00113 char** T;
00114
00116 char** T_names;
00117
00119 int* n;
00120
00122 int* N;
00123
00126 int* m;
00127
00134 int h;
00135
00139 int h_so_far;
00140
00146 float threshold;
00147
00150 bool enum_differences;
00151
00154 int delta;
00155
00159 bool count_results;
00160
00163 bool reverted;
00164
00168 int algorithm;
00169
00175 bool grouped;
00176
00179 bool output_detail;
00180
00182 bool anything_found;
00183
00185 bool ps_output;
00186
00188 std::string ps_filename;
00189
00192 mpz_class progress;
00193
00194 int prgr_count;
00195
00198 mpz_class num_of_inst;
00199
00202 int *** p;
00203
00206 bool *** R;
00207
00209 int ** lambda;
00210
00212 int * Lambda;
00213
00218 w_bit_set ** C;
00219
00225 arrangement* A;
00226
00228 int ** P;
00229
00233 int *** M;
00234
00235 int L;
00236
00239 float overall_best_weight;
00240
00243 float h_best_weight;
00244
00247 int* best_nu;
00248
00252 int* last_nu;
00253
00257 int** best_h_nu;
00258
00261 float* best_h_wghts;
00262
00265 w_bit_set* best_h_intersections;
00266
00268 int num_of_solutions;
00269
00272 w_bit_set best_intersection;
00273
00275 bool weighted;
00276
00282 int weight_type;
00283
00287 bool reverse;
00288
00294 bool report_progress;
00295
00297 int* index_table;
00298
00301 int** frequencies;
00302
00305 int*** di_frequencies;
00306
00307 public:
00308
00310 weighted_footprint_detector();
00311
00312 weighted_footprint_detector(int,int,char**,char**,char**,char**,int*,int*,int*);
00313
00315 ~weighted_footprint_detector();
00316
00318 void clear();
00319
00323 void init_arrangement_cells();
00324
00327 bool bound(const w_bit_set& M, const unsigned int& j);
00328
00331 bool w_bound(const w_bit_set& M, const unsigned int& j);
00332
00337 bool h_best_w_bound(const w_bit_set& M, const unsigned int& j);
00338
00341 void enum_w_sets(w_bit_set M, int j);
00342
00346 void enum_h_best_w_sets(w_bit_set M, int j);
00347
00351 void enum_h_best_sets(w_bit_set M, int j);
00352
00355 void enum_sets(w_bit_set M, int j);
00356
00358 void enumerate_subsets();
00359
00362 void enumerate_h_best_subsets();
00363
00365 void compute_intersections();
00366
00373 void update_h_best_list(const float& wght, int* nu, w_bit_set& intersection);
00374
00377 void compute_h_best_intersections();
00378
00380 w_bit_set difference(w_bit_set M, w_bit_set& D);
00381
00383 bool test_diff_set(w_bit_set& M);
00384
00387 void get_element_array(w_bit_set M, int* E, int& l);
00388
00391 void enumerate_differences(int delta);
00392
00395 void get_frag_occurences(int i, int j, match_list&);
00396
00398 void get_occurences(int i, int j);
00399
00401 void get_group_occurences(int i, int j);
00402
00404 void run(int,float);
00405
00409 void reverse_compl();
00410
00412 float get_result();
00413
00415 void print_h_results();
00416
00418 float print_single_result(int* nu, w_bit_set& intersection, const float& weight);
00419
00421 float print_result();
00422
00425 void print_postscript(std::string filename);
00426
00429 void print_ps_labels(std::string filename);
00430
00433 void print_ps_h_results(std::string&);
00434
00437 void print_ps_single_result(int* nu, std::ostream& o, w_bit_set& intersection);
00438
00439
00440
00443 void print_with_names(w_bit_set&);
00444
00447 void set_number_of_hits(int);
00448
00450 void set_weighted(bool);
00451
00454 void set_count_results(bool);
00455
00458 void set_weight_type(int wt);
00459
00463 void set_reverse(bool);
00464
00466 void set_postscript(bool);
00467
00469 void set_ps_filename(std::string);
00470
00472 void set_progress_report(bool);
00473
00477 void set_algorithm(int A);
00478
00480 void set_grouping(bool);
00481
00484 void set_group_characters(int);
00485
00489 void get_groups();
00490
00492 void set_output_detail(bool);
00493
00497 void set_delta(int);
00498
00499
00500
00505 bool increase(int* nu,int I);
00506
00515 bool differs(int*,int*);
00516
00520 double p_value(int i, int j, int nu);
00521
00525 void get_freq();
00526
00529 float get_weight(int i, int j, int nu);
00530
00532 void compute_index_table();
00533 };
00534
00535 template class weighted_footprint_detector<weighted_variable_bitset>;
00536 template class weighted_footprint_detector<weighted_bit_multi_set>;
00537 template class weighted_footprint_detector<bit_multi_set>;
00538
00539 typedef weighted_footprint_detector<weighted_variable_bitset> w_set_footprint_detector;
00540 typedef weighted_footprint_detector<bit_multi_set> w_mset_footprint_detector;
00541 typedef weighted_footprint_detector<weighted_bit_multi_set> w_wmset_footprint_detector;
00542
00544
00545 }
00546
00547
00548 #include "weighted_footprint_detector.cpp"
00549
00550 #endif
00551
00552