/*
 * Copyright  2014 Daniel Taliun, Johann Gamper and Cristian Pattaro. All rights reserved.
 *
 * This file is part of S-MIG++.
 *
 * S-MIG++ is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * S-MIG++ is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with S-MIG++. If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <time.h>
#include <map>
#include <vector>
#include <mpi.h>

#include "db/include/Db.h"
#include "algorithms/include/ContourBuilder.h"
#include "algorithms/include/MIG.h"

using namespace std;

static const char* HELP = "--help";
static const char* VERSION = "--version";
static const char* HAPMAP2 = "--hapmap2";
static const char* VCF = "--vcf";
static const char* MAF = "--maf";
static const char* REGION = "--region";
static const char* LDRATIO = "--ld-ratio";
static const char* CI = "--ci";
static const char* SAMPLES = "--samples";
static const char* PROBABILITY = "--probability";
static const char* SEED = "--seed";
static const char* MESSAGE = "--message";
static const char* OUT = "--out";

static const char* BLOCKS_SUFFIX = ".blocks.gz";

static const unsigned int SEND_RECV_N_CELLS = 10000u;

map<const char*, vector<const char*>*, bool(*)(const char*, const char*)> options(auxiliary::bool_strcmp_ignore_case);
map<const char*, vector<const char*>*, bool(*)(const char*, const char*)>::iterator options_it;
vector<const char*>* arguments = NULL;

const char* input_files_format = NULL;
const char* input_phase_file = NULL;
const char* input_map_file = NULL;
const char* output_file = NULL;

double maf_threshold = 0.0;
unsigned long int region_start = 0u;
unsigned long int region_end = numeric_limits<unsigned long int>::max();
const char* ci_method = AlgorithmFactory::WP;
double ld_ratio = 0.95;
double samples = 0.01;
double probability = 0.99;
unsigned long int seed = 0ul;
unsigned int message_size = SEND_RECV_N_CELLS;
bool region = false;
unsigned int n_segments = 0u;
unsigned int window = 0u;
char* endptr = NULL;

void print_version() {
	cout << "S-MIG++ 1.0.0 MPI" << endl << endl;
	cout << "Copyright (C) 2014 Daniel Taliun, Johann Gamper and Cristian Pattaro." << endl;
	cout << "All rights reserved." << endl << endl;
	cout << "This is free software: you can redistribute it and/or modify" << endl;
	cout << "it under the terms of the GNU General Public License as published by" << endl;
	cout << "the Free Software Foundation, either version 3 of the License, or" << endl;
	cout << "(at your option) any later version." << endl << endl;
	cout << "This software is distributed in the hope that it will be useful," << endl;
	cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl;
	cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the" << endl;
	cout << "GNU General Public License for more details." << endl << endl;
	cout << "You should have received a copy of the GNU General Public License" << endl;
	cout << "along with S-MIG++. If not, see <http://www.gnu.org/licenses/>." << endl;
	cout << endl << endl;
}


void print_help() {
	cout << "Description:" << endl << endl;
	cout << " This is the S-MIG++ algorithm's version for distributed computations." << endl;
	cout << " The S-MIG++ algorithm is a sampling based, memory and runtime efficient" << endl;
	cout << " haplotype blocks recognition algorithm that uses Gabriel et al. (2002)" << endl;
	cout << " criteria to define blocks based on |D'| LD values between SNPs. It was" << endl;
	cout << " designed to handle large datasets with millions of SNPs and thousands of" << endl;
	cout << " samples." << endl;
	cout << endl << endl;

	cout << "Usage:" << endl;
	cout <<	endl << " (with HapMap format)" << endl;
	cout << " mpirun -np <processes> smigpp " << HAPMAP2 << " <legend file> <phase file> " << OUT << " <output prefix>" << endl;

	cout << endl << " (with VCF format)" << endl;
	cout << " mpirun -np <processes> smigpp " << VCF << " <file> " << OUT << " <output prefix>" << endl;
	cout << endl << endl;

	cout << "Mandatory arguments:" << endl << endl;

	cout << " " << HAPMAP2;
	cout << "\tTwo files in HapMap format: (1) the legend file with marker" << endl;
	cout << "\t\tpositions; (2) the file with phased genotypes." << endl;
	cout << endl << endl;

	cout << " " << VCF;
	cout << "\t\tVCF file with phased genotypes.";
	cout << endl << endl;

	cout << " " << OUT;
	cout << "\t\tPrefix for the output file with non-overlapping haplotype" << endl;
	cout << "\t\tblocks. The output file is compressed with GZIP and is named" << endl;
	cout << "\t\tas <output prefix>.blocks.gz." << endl;
	cout << endl << endl;

	cout << "Optional arguments:" << endl << endl;

	cout << " " << MAF;
	cout << "\t\tThreshold for the Minor Allele Frequency (MAF). Must be from" << endl;
	cout << "\t\tthe [0, 0.5) interval. The default value is 0, i.e. all" << endl;
	cout << "\t\tmonomorphic markers are filtered out." << endl;
	cout << endl << endl;

	cout << " " << REGION;
	cout << "\tThe start and end positions (in base-pairs) of the chromosomal" << endl;
	cout << "\t\tregion to be partitioned." << endl;
	cout << endl << endl;

	cout << " " << LDRATIO;
	cout << "\tThreshold for the ratio of high LD SNP pairs to all" << endl;
	cout << "\t\tinformative SNP pairs inside a haplotype block. The default" << endl;
	cout << "\t\tand recommended value is 0.95." << endl;
	cout << endl << endl;

	cout << " " << CI;
	cout << "\t\t|D'| confidence interval (CI) estimation method. Supported" << endl;
	cout << "\t\tmethods are WP = Wall and Pritchard (2003) method;" << endl;
	cout << "\t\tAV = approximate variance estimator by Zapata et al. (1997)." << endl;
	cout << "\t\tThe default and recommended method is WP." << endl;
	cout << endl << endl;

	cout << " " << SAMPLES;
	cout << "\tProportion of SNP pairs to sample. Must be from the (0; 1)" << endl;
	cout << "\t\tinterval. The recommended values are from 0.01 to 0.05." << endl;
	cout << "\t\tThe default value is 0.01." << endl;
	cout << endl << endl;

	cout << " " << PROBABILITY;
	cout << "\tThe probability of the correctly estimated upper limits for" << endl;
	cout << "\t\thaplotype blocks boundaries. Must be from the (0; 1) interval." << endl;
	cout << "\t\tIt is highly recommended to set this value to 0.95 or greater." << endl;
	cout << "\t\tThe default value is 0.99." << endl;
	cout << endl << endl;

	cout << " " << SEED;
	cout << "\t\tSeed for the random sampling. Must be a positive integer" << endl;
	cout << "\t\tnumber. The default value is generated from the current time." << endl;
	cout << endl << endl;

	cout << " " << MESSAGE;
	cout << "\tThe maximal number of cells that are send/received in MPI" << endl;
	cout << "\t\tcommunications between processes. The default value is 10,000." << endl;
	cout << endl << endl;

	cout << " " << HELP;
	cout << "\t\tDisplay this information." << endl;
	cout << endl << endl;

	cout << " " << VERSION;
	cout << "\tDisplay S-MIG++ version information." << endl;
	cout << endl << endl;

	cout << "Output:" << endl << endl;

	cout << " The first meta-information lines in the output file start with '#' symbol." << endl;
	cout << " The output file consists of the following 12 columns:" << endl;
	cout << " BLOCK_NAME\t\tGenerated unique block name." << endl;
	cout << " FIRST_SNP\t\tName of the first SNP in block." << endl;
	cout << " LAST_SNP\t\tName of the last SNP in block." << endl;
	cout << " FIRST_SNP_ID\t\tIndex of the first SNP in block with respect to the" << endl;
	cout <<	" \t\t\tfiltered SNPs." << endl;
	cout << " LAST_SNP_ID\t\tIndex of the last SNP in block with respect to the" << endl;
	cout <<	" \t\t\tfiltered SNPs." << endl;
	cout << " START_BP\t\tThe base-pair position of the first SNP in block." << endl;
	cout << " END_BP\t\t\tThe base-pair position of the last SNP in block." << endl;
	cout << " N_SNPS\t\t\tNumber of SNPs in block." << endl;
	cout << " N_HAPS\t\t\tNumber of haplotypes in block." << endl;
	cout << " N_UNIQUE_HAPS\t\tNumber of unique haplotypes in block." << endl;
	cout << " N_COMMON_HAPS\t\tNumber of common (which appear more than once)" << endl;
	cout <<	" \t\t\thaplotypes in block." << endl;
	cout << " N_HAPS_DIVERSITY\tThe haplotype diversity in block (Patil et al., 2001)." << endl;
	cout << " \t\t\t1 - low diversity, 0 - high diversity." << endl;
	cout << endl;
}

void clean_options() {
	for (options_it = options.begin(); options_it != options.end(); ++options_it) {
		delete options_it->second;
	}
	options.clear();
}

bool save_options(int args, char* argv[], bool verbose) {
	for (int i = 1; i < args; i++) {
		if (auxiliary::strcmp_ignore_case(argv[i], "--", 2) == 0) {
			options_it = options.find(argv[i]);
			if (options_it != options.end()) {
				arguments = options_it->second;
			} else {
				arguments = new vector<const char*>();
				options.insert(pair<const char*, vector<const char*>*>(argv[i], arguments));
			}
		} else if (arguments != NULL) {
			arguments->push_back(argv[i]);
		} else {
			if (verbose) {
				cout << "Vague command line argument '" << argv[i] << "' was specified."  << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	return true;
}

bool check_options(bool verbose) {
	options_it = options.find(HELP);
	if (options_it != options.end()) {
		if (options_it->second->size() != 0) {
			if (verbose) {
				cout << "Option '" << HELP << "' doesn't require any arguments." << endl;
			}
			return false;
		}

		if (verbose) {
			print_help();
		}

		return false;
	}

	options_it = options.find(VERSION);
	if (options_it != options.end()) {
		if (options_it->second->size() != 0) {
			if (verbose) {
				cout << "Option '" << VERSION << "' doesn't require any arguments." << endl;
			}
			return false;
		}

		if (verbose) {
			print_version();
		}

		return false;
	}

	if ((options.count(VCF) > 0) && (options.count(HAPMAP2) > 0)) {
		if (verbose) {
			cout << "Specify only one option: '"<< VCF << "' or '" << HAPMAP2 << "'." << endl;
			cout << "Specify '" << HELP << "' for the command line description." << endl;
		}
		return false;
	}

	if ((options.count(VCF) == 0) && (options.count(HAPMAP2) == 0)) {
		if (verbose) {
			cout << "Specify input file names (command line option '" << VCF << "' or '" << HAPMAP2 << "')." << endl;
			cout << "Specify '" << HELP << "' for the command line description." << endl;
		}
		return false;
	}

	options_it = options.find(VCF);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify VCF file name after '"<< VCF << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		input_files_format = Db::VCF;
		input_phase_file = arguments->at(0);
	}

	options_it = options.find(HAPMAP2);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 2) {
			if (verbose) {
				cout << "Specify legend file name and phase file name after '" << HAPMAP2 << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		input_files_format = Db::HAPMAP2;
		input_map_file = arguments->at(0);
		input_phase_file = arguments->at(1);
	}

	options_it = options.find(REGION);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 2) {
			if (verbose) {
				cout << "Specify region start and end positions after '" << REGION << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		region = true;
		region_start = strtoul(arguments->at(0), NULL, 0);
		if (region_start == 0) {
			if (verbose) {
				cout << "Invalid region start position after '" << REGION << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		region_end = strtoul(arguments->at(1), NULL, 0);
		if (region_end == 0) {
			if (verbose) {
				cout << "Invalid region end position after '" << REGION << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		if (region_start >= region_end) {
			if (verbose) {
				cout << "Region start position must be greater than region end position after '" << REGION << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	options_it = options.find(MAF);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify MAF threshold after '" << MAF << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		maf_threshold = strtod(arguments->at(0), &endptr);
		if (*endptr != '\0') {
			if (verbose) {
				cout << "Invalid MAF threshold after '" << MAF << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	options_it = options.find(LDRATIO);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify ratio of LD to informative SNP pairs after '" << LDRATIO << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		ld_ratio = strtod(arguments->at(0), &endptr);
		if ((*endptr != '\0') || (ld_ratio <= 0.0) || (ld_ratio >= 1.0)) {
			if (verbose) {
				cout << "Invalid ration of LD to informative SNP pairs after '" << LDRATIO << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	options_it = options.find(CI);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify |D'| CI computation method (" << AlgorithmFactory::WP << " or " << AlgorithmFactory::AV << ") after '" << CI << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		ci_method = arguments->at(0);
	}

	options_it = options.find(SAMPLES);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify proportion of SMP pairs to sample after '" << SAMPLES << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		samples = strtod(arguments->at(0), &endptr);
		if ((*endptr != '\0') || (samples <= 0.0) || (samples >= 1.0)) {
			if (verbose) {
				cout << "Invalid proportion of SNP pairs to sample after '" << SAMPLES << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	options_it = options.find(PROBABILITY);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify probability of the correct estimations after '" << PROBABILITY << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		probability = strtod(arguments->at(0), &endptr);
		if ((*endptr != '\0') || (probability <= 0.0) || (probability >= 1.0)) {
			if (verbose) {
				cout << "Invalid probability of the correct estimations after '" << PROBABILITY << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
	}

	options_it = options.find(OUT);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify output file name (exactly one) after '" << OUT << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		output_file = arguments->at(0);
	} else {
		if (verbose) {
			cout << "Specify output file name after '" << OUT << "' option." << endl;
			cout << "Specify '" << HELP << "' for the command line description." << endl;
		}
		return false;
	}

	options_it = options.find(SEED);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify random seed after '"<< SEED << "' option." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		seed = strtoul(arguments->at(0), NULL, 0);
	}

	options_it = options.find(MESSAGE);
	if (options_it != options.end()) {
		arguments = options_it->second;
		if (arguments->size() != 1) {
			if (verbose) {
				cout << "Specify number of segments in MPI message (command line option '" << MESSAGE << "')." << endl;
				cout << "Specify '" << HELP << "' for the command line description." << endl;
			}
			return false;
		}
		message_size = strtol(arguments->at(0), NULL, 0);
	}

	return true;
}

int main(int args, char** argv) {
	int process_id = 0;
	int n_processes = 0;

	unsigned long int matrix_size = 0ul;

	unsigned int cells_buffer_size = 0u;
	unsigned int samples_buffer_size = 0u;

	unsigned int* recv_cells_buffer = NULL;
	unsigned int* send_samples_buffer = NULL;
	unsigned int n_cells = 0u;

	unsigned int* estimated_contour = NULL;
	unsigned int* contour = NULL;
	unsigned long int contour_area = 0ul;

	double mpi_start_time = 0.0;
	double mpi_elapsed_time = 0.0;
	double mpi_sampling_time = 0.0;
	double mpi_migpp_time =0.0;

	unsigned long int n_local_computations = 0ul;
	unsigned long int n_sampling_computations = 0u;
	unsigned long int n_migpp_computations = 0ul;

	MPI::Init(args, argv);
	MPI::COMM_WORLD.Set_errhandler(MPI::ERRORS_THROW_EXCEPTIONS);

	try {
		Db db;

		process_id = MPI::COMM_WORLD.Get_rank();

		if (!save_options(args, argv, process_id == 0)) {
			clean_options();
			MPI::Finalize();
			return 0;
		}

		if (!check_options(process_id == 0)) {
			clean_options();
			MPI::Finalize();
			return 0;
		}

		cells_buffer_size = message_size * 2u + 1u;
		samples_buffer_size = message_size * 3u;

		recv_cells_buffer = (unsigned int*)malloc(sizeof(unsigned int) * cells_buffer_size);
		if (recv_cells_buffer == NULL) {
			throw Exception(__FILE__, __LINE__, "Error in memory allocation.");
		}

		send_samples_buffer = (unsigned int*)malloc(sizeof(unsigned int) * samples_buffer_size);
		if (send_samples_buffer == NULL) {
			throw Exception(__FILE__, __LINE__, "Error in memory allocation.");
		}

		if (!region) {
			if (strcmp(input_files_format, Db::VCF) == 0) {
				db.load_vcf(input_phase_file);
			} else if (strcmp(input_files_format, Db::HAPMAP2) == 0) {
				db.load_hapmap2(input_map_file, input_phase_file);
			}
		} else {
			if (strcmp(input_files_format, Db::VCF) == 0) {
				db.load_vcf(input_phase_file, region_start, region_end);
			} else if (strcmp(input_files_format, Db::HAPMAP2) == 0) {
				db.load_hapmap2(input_map_file, input_phase_file, region_start, region_end);
			}
		}

		db.mask(maf_threshold);

		if (n_segments == 0u) {
			n_segments = db.get_n_markers() * sqrt(samples);
			while (n_segments > 1u) {
				if (pow(db.get_n_markers() / n_segments, 2.0) * samples >= 1.0) {
					break;
				}
				--n_segments;
			}
		}

		ContourBuilder builder(db, ci_method, ProfileFactory::FS, probability, ld_ratio, samples, n_segments, true, seed);

		if (process_id == 0) {
			unsigned int* scatter_buffer = NULL;
			unsigned int* gather_buffer = NULL;

			char* blocks_file = NULL;

			n_processes = MPI::COMM_WORLD.Get_size();

			scatter_buffer = (unsigned int*)malloc(n_processes * sizeof(unsigned int) * cells_buffer_size);
			if (scatter_buffer == NULL) {
				throw Exception(__FILE__, __LINE__, "Error in memory allocation.");
			}

			gather_buffer = (unsigned int*)malloc(n_processes * sizeof(unsigned int) * samples_buffer_size);
			if (scatter_buffer == NULL) {
				throw Exception(__FILE__, __LINE__, "Error in memory allocation.");
			}

			blocks_file = (char*)malloc((strlen(output_file) + strlen(BLOCKS_SUFFIX) + 1u) * sizeof(char));
			if (blocks_file == NULL) {
				throw Exception(__FILE__, __LINE__, "Error in memory allocation.");
			}
			blocks_file[0u] = '\0';
			strcat(blocks_file, output_file);
			strcat(blocks_file, BLOCKS_SUFFIX);

			matrix_size = (((unsigned long int)db.get_n_markers()) * (((unsigned long int)db.get_n_markers()) - 1ul)) / 2.0;

			cout << "===============================================================================" << endl;
			cout << "Input:" << endl;
			cout << " File: " << input_phase_file << endl;
			if (region) {
				cout << " Region: [" << region_start << ", " << region_end << "]" << endl;
			}
			cout << " Markers: " << db.get_all_n_markers() << endl;
			cout << " Non-monomorphic markers (MAF > " << maf_threshold << "): " << db.get_n_markers() << endl;
			cout << " Haplotypes: " << db.get_n_haplotypes() << endl;
			cout << " Memory used per process (Mb): " << db.get_memory_usage() << endl;
			cout << endl;

			cout << "===============================================================================" << endl;
			cout << "Sampling matrix:" << endl;
			cout << " Segments: " << n_segments << endl;
			cout << " Cells: " << builder.get_n_cells() << endl;
			cout << " Mean cell side length (markers): " << ((double)db.get_n_markers() / (double)builder.get_n_segments()) << endl;
			cout << endl;

			cout << "===============================================================================" << endl;
			cout << "MPI buffers: " << endl;
			cout << " Processes: " << n_processes  << endl;
			cout << " Send/receive message size (cells): " << message_size << endl;
			cout << " Send message size (Kb): " << ((sizeof(unsigned int) * cells_buffer_size) / 1024.0) << endl;
			cout << " Receive message size (Kb): " << ((sizeof(unsigned int) * samples_buffer_size) / 1024.0) << endl;
			cout << " Memory used (Mb): " << ((n_processes * sizeof(unsigned int) * (cells_buffer_size + samples_buffer_size)) / 1048576.0) << endl;
			cout << endl;

			cout << "===============================================================================" << endl;
			cout << "Distributed sampling: " << endl;
			cout << " LD ratio: " << ld_ratio << endl;
			cout << " D' CI method: " << ci_method << endl;
			cout << " Samples (%): " << samples << endl;
			cout << " Probability: " << probability << endl;
			cout << " Seed: " << builder.get_seed() << endl;
			cout << " Significance: " << builder.get_alpha() << endl;

			mpi_start_time = MPI::Wtime();
			do {
				for (int i = 0; i < n_processes; ++i) {
					builder.mpi_get_segments(scatter_buffer + i * cells_buffer_size, message_size);
				}

				if (scatter_buffer[0u] == 0u) {
					for (int i = 0; i < n_processes; ++i) {
						(scatter_buffer + i * cells_buffer_size)[0u] = numeric_limits<unsigned int>::max();
					}
				}

				MPI::COMM_WORLD.Scatter(scatter_buffer, cells_buffer_size, MPI::UNSIGNED, recv_cells_buffer, cells_buffer_size, MPI::UNSIGNED, 0);

				n_cells = recv_cells_buffer[0u];
				if (n_cells == numeric_limits<unsigned int>::max()) {
					break;
				}

				builder.mpi_process_segments(recv_cells_buffer, send_samples_buffer);

				MPI::COMM_WORLD.Gather(send_samples_buffer, samples_buffer_size, MPI::UNSIGNED, gather_buffer, samples_buffer_size, MPI::UNSIGNED, 0);

				builder.mpi_build(scatter_buffer, gather_buffer, n_processes, message_size);
			} while (true);
			mpi_sampling_time = MPI::Wtime() - mpi_start_time;

			n_local_computations = builder.get_n_computations();
			MPI::COMM_WORLD.Reduce(&n_local_computations, &n_sampling_computations, 1, MPI::UNSIGNED, MPI::SUM, 0);

			cout << " Performed computations: " << n_sampling_computations << " (" << ((double)n_sampling_computations / (double)matrix_size) << ")" << endl;
			cout << " Performed computations per cell: " << (n_sampling_computations / (double)builder.get_n_cells()) << endl;
			cout << " Estimated haplotype blocks contour size: " << builder.get_estimated_contour_area() << " (" << ((long double)builder.get_estimated_contour_area() / (long double)matrix_size) << ")" << endl;
			cout << " Time used (sec): " << mpi_sampling_time << endl;
			cout << endl;

			estimated_contour = builder.get_estimated_contour();

			MIG mig(db);

			if (window == 0u) {
				window = (unsigned int)((0.05 * (double)builder.get_estimated_contour_area()) / (double)db.get_n_markers());
			}

			cout << "===============================================================================" << endl;
			cout << "Computing haplotype blocks: " << endl;
			cout << " D' CI method: " << ci_method << endl;
			cout << " Window: " << window << endl;

			mpi_start_time = MPI::Wtime();
			n_migpp_computations = mig.compute_candidate_blocks_migpp(estimated_contour, ci_method, window);
			mpi_migpp_time = MPI::Wtime() - mpi_start_time;

			cout << " Performed computations: " << n_migpp_computations << " (" << ((long double)n_migpp_computations / (long double)matrix_size) << ")" << endl;
			cout << " Candidate haplotype blocks: " << mig.get_n_strong_pairs() << endl;
			cout << " Memory used (Mb): " << mig.get_max_memory_usage() << endl;
			cout << " Time used (sec): " <<  mpi_migpp_time << endl;
			cout << endl;
//			END: COMPUTING HAPLOTYPE BLOCKS

//			BEGIN: SELECTING NON-OVERLAPPING HAPLOTYPE BLOCKS
			cout << "===============================================================================" << endl;
			cout << "Selecting non-overlapping haplotype blocks: " << endl;

			mpi_start_time = MPI::Wtime();
			mig.sort_candidate_blocks();
			mig.select_final_blocks();
			mpi_elapsed_time = MPI::Wtime() - mpi_start_time;

			cout << " Final haplotype blocks: " << mig.get_n_blocks() << endl;
			cout << " Time used (sec): " << mpi_elapsed_time << endl;
			cout << endl;
//			END: SELECTING NON-OVERLAPPING HAPLOTYPE BLOCKS

//			BEGIN: WRITING HAPLOTYPE BLOCKS
			cout << "===============================================================================" << endl;
			cout << "Writing haplotype blocks: " << endl;

			mpi_start_time = MPI::Wtime();
			mig.write_blocks(blocks_file, WriterFactory::GZIP, input_phase_file, input_map_file, maf_threshold, region, region_start, region_end, ci_method);
			mpi_elapsed_time = MPI::Wtime() - mpi_start_time;

			cout << " Non-overlapping haplotype blocks: " << blocks_file << endl;
			cout << " Time used (sec): " << mpi_elapsed_time << endl;
			cout << endl;
//			END: WRITING HAPLOTYPE BLOCKS

			cout << "===============================================================================" << endl;
			cout << " Total time used (sec): " << setprecision(10) << (mpi_sampling_time + mpi_migpp_time) << endl;
			cout << endl;

			free(scatter_buffer);
			scatter_buffer = NULL;

			free(gather_buffer);
			gather_buffer = NULL;

			free(blocks_file);
			blocks_file = NULL;
		} else {
			do {
				MPI::COMM_WORLD.Scatter(NULL, cells_buffer_size, MPI::UNSIGNED, recv_cells_buffer, cells_buffer_size, MPI::UNSIGNED, 0);

				n_cells = recv_cells_buffer[0u];
				if (n_cells == numeric_limits<unsigned int>::max()) {
					break;
				}

				builder.mpi_process_segments(recv_cells_buffer, send_samples_buffer);

				MPI::COMM_WORLD.Gather(send_samples_buffer, samples_buffer_size, MPI::UNSIGNED, NULL, samples_buffer_size, MPI::UNSIGNED, 0);
			} while (true);

			n_local_computations = builder.get_n_computations();
			MPI::COMM_WORLD.Reduce(&n_local_computations, NULL, 1, MPI::UNSIGNED, MPI::SUM, 0);
		}
	} catch (MPI::Exception &e) {
		cout << endl;
		cout << "Process ID: " << process_id << endl;
		cout << e.Get_error_string() << endl;
	} catch (Exception &e) {
		cout << endl;
		cout << "Process ID: " << process_id << endl;
		cout << e.what() << endl;
	}

	if (recv_cells_buffer != NULL) {
		free(recv_cells_buffer);
		recv_cells_buffer = NULL;
	}

	if (send_samples_buffer != NULL) {
		free(send_samples_buffer);
		send_samples_buffer = NULL;
	}

	MPI::Finalize();

	clean_options();

	return 0;
}
