Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0ca6c41
Surveyor: add filter options to build similarity and distance matrices
zorino Jan 13, 2015
324c512
Surveyor: change type verification with defined variables
zorino Jan 15, 2015
6540f7d
fix unused variable
zorino Mar 11, 2015
7bd03fb
Surveyor: storekeeper now use define values instead of int
zorino Mar 17, 2015
28b0d47
edit StoreKeeper.cpp physical from virtual colors loop now in N*LogN
zorino Apr 7, 2015
62131e3
edit comments in Mother
zorino Apr 13, 2015
ef1d529
edit typo in Mother.cpp comments
zorino Apr 14, 2015
bd004c5
edit output the name of the actors for debugging purpose
zorino Apr 15, 2015
1cacd6c
edit refactored the Reades comments
zorino Apr 15, 2015
1d8bc84
edit comments in Storekeeper.cpp
zorino Apr 16, 2015
74ca0ad
add BUILD to .gitignore
zorino Apr 30, 2015
9eb236f
fix: segfault of message StoreKeeper->KmerMatrixOwner
zorino May 25, 2015
07eb149
add multiple matrices output for filters [DEBUG]
zorino Jun 22, 2015
059b042
surveyor: many filters in 1 run
zorino Jul 20, 2015
7dd2cbc
fix undesirable and random print of kmerMatrix
zorino Oct 5, 2015
1ed9d2d
add normalized similarity and distance matrices
zorino Dec 14, 2015
a502b05
edit rm old comments
zorino Dec 14, 2015
5099a8e
add Surveyor scripts
zorino Nov 29, 2016
3bdd5f7
fix bug in upgma gentree
zorino Dec 5, 2016
725041c
add support to normalize matrix with another matrix norms
zorino Dec 15, 2016
c276f35
add matrix-transform script
zorino Dec 15, 2016
6702e82
edit treeclust hierarchical clustering method
zorino Dec 21, 2016
e8eda44
edit matrix-transform.py script
zorino Jan 5, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
Ray
test/
PREFIX
BUILD
REFIX
22 changes: 20 additions & 2 deletions code/Mock/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1611,11 +1611,29 @@ void Parameters::showUsage(){
showOptionDescription("RayOutput/Surveyor/DistanceMatrix.tsv is a distance matrix (kernel-based).");
cout << endl;

showOption("-read-sample-graph SampleName SampleGraphFile", "Reads a sample graph (generated with -write-kmers)");
showOption("-read-sample-graph SampleName SampleGraphFile", "Reads a sample graph (generated with -write-kmers from a Ray's assembly)");
cout<<endl;

showOption("-read-sample-assembly SampleName SampleAssemblyFile", "Reads an assembly (a fasta file)");
showOption("-read-sample-assembly SampleName SampleAssemblyFile", "Reads an assembly (fasta file)");
cout<<endl;
cout<<endl;

cout << " [Filtering] : you can filter the similarity and distance matrices with input datasets !" << endl;
cout << " Surveyor will output the global matrices and the filtered matrices as well " << endl;
cout << " .. you can associate many filters to a single output matrix by giving the same number -<X>" << endl;
cout << endl;

showOption("-filter-in-[assembly|graph]-<X> SampleName Sample[Assembly|Graph]File", "Incorporate only these dataset kmers when building the similarity and distance matrices");
showOptionDescription("If there's multiple instances of this option than the kmer will be incorporate if present in one dataset");
showOptionDescription("No need to read the dataset as a sample before using this option");
showOptionDescription("X is the number of the filter");

cout<<endl;
showOption("-filter-out-[assembly|graph]-<X> SampleName Sample[Assembly|Graph]File", "Filter out the dataset kmers when building the similarity and distance matrices");
showOptionDescription("No need to read the dataset as a sample before using this option");
showOptionDescription("X is the number of the filter");
cout<<endl;


showOption("-write-kmer-matrix", "Write a 0|1 kmer matrix into RayOutput/Surveyor/KmerMatrix.tsv");
showOptionDescription("Rows being all the kmers and columns being all the samples.");
Expand Down
16 changes: 8 additions & 8 deletions code/Surveyor/CoalescenceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,19 @@ void CoalescenceManager::receive(Message & message) {

die();

} else if(tag == SET_KMER_LENGTH) {
} else if(tag == SET_KMER_INFO) {

int kmerLength = 0;
char * buffer = (char*)message.getBufferBytes();
memcpy(&kmerLength, buffer, sizeof(kmerLength));


if(m_kmerLength == 0)
m_kmerLength = kmerLength;

if(m_kmerLength != kmerLength) {

printName();
cout << " Error: the k-mer length is not the same in all input files !";
cout << "[CoalescenceManager] ERROR: the k-mer length is not the same in all input files : " << m_kmerLength;
cout << endl;
}

Expand All @@ -107,13 +107,13 @@ void CoalescenceManager::receive(Message & message) {
m_colorSpaceMode = false;

Message response;
response.setTag(SET_KMER_LENGTH_OK);
response.setTag(SET_KMER_INFO_OK);

int source = message.getSourceActor();

/*
printName();
cout << "DEBUG Sending SET_KMER_LENGTH_OK to " << source << endl;
cout << "DEBUG Sending SET_KMER_INFO_OK to " << source << endl;
*/

send(source, response);
Expand Down Expand Up @@ -172,8 +172,8 @@ void CoalescenceManager::receive(Message & message) {
m_storeLastActor = last;

printName();
cout << " is now acquainted with StoreKeeper actors from ";
cout << m_storeFirstActor << " to " << m_storeLastActor << endl;
cout << "[CoalescenceManager] is now acquainted with [StoreKeepers] from #";
cout << m_storeFirstActor << " to #" << m_storeLastActor << endl;

// allocate buffers too

Expand Down Expand Up @@ -368,7 +368,7 @@ bool CoalescenceManager::addKmerInBuffer(int producer, int & actor, int & sample

#if 0
printName();
cout << "sends bits to StoreKeeper # " << storageDestination;
cout << "sends bits to [StoreKeeper] #" << storageDestination;
cout << endl;
#endif

Expand Down
4 changes: 2 additions & 2 deletions code/Surveyor/CoalescenceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ class CoalescenceManager : public Actor {
FIRST_TAG = 10150,
PAYLOAD,
PAYLOAD_RESPONSE,
SET_KMER_LENGTH,
SET_KMER_LENGTH_OK,
SET_KMER_INFO,
SET_KMER_INFO_OK,
INTRODUCE_STORE,
FLUSH_BUFFERS,
FLUSH_BUFFERS_OK,
Expand Down
35 changes: 9 additions & 26 deletions code/Surveyor/GenomeAssemblyReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ using namespace std;

#include <string.h>

// TODO: need to know the kmer size
// DONE: need to know the kmer size

GenomeAssemblyReader::GenomeAssemblyReader() {

Expand All @@ -52,21 +52,12 @@ void GenomeAssemblyReader::receive(Message & message) {

int type = message.getTag();

/*
printName();
cout << "received tag " << type << endl;
*/

if(type == START_PARTY) {

startParty(message);

} else if(type == CoalescenceManager::PAYLOAD_RESPONSE) {

/*
printName();
cout << " DEBUG readLine because PAYLOAD_RESPONSE" << endl;
*/
// read the next line now !
readKmer();
}
Expand All @@ -82,7 +73,7 @@ void GenomeAssemblyReader::startParty(Message & message) {
m_loaded = 0;

printName();
cout <<"opens file " << m_fileName << endl;
cout << "[AssemblyReader] opens file " << m_fileName << endl;

m_parent = message.getSourceActor();

Expand All @@ -105,9 +96,6 @@ void GenomeAssemblyReader::readKmer() {
string badParent = "";
string badChild = "";

// ofstream outFile;
// outFile.open("kmers-created.txt", ios::app);

if(m_kmerReader.hasAnotherKmer()){

m_kmerReader.fetchNextKmer(sequence);
Expand Down Expand Up @@ -149,7 +137,7 @@ void GenomeAssemblyReader::manageCommunicationForNewKmer(string & sequence, Cove
if(m_loaded == 0) {

Message aMessage;
aMessage.setTag(CoalescenceManager::SET_KMER_LENGTH);
aMessage.setTag(CoalescenceManager::SET_KMER_INFO);

int length = sequence.length();
aMessage.setBuffer(&length);
Expand Down Expand Up @@ -206,14 +194,10 @@ void GenomeAssemblyReader::manageCommunicationForNewKmer(string & sequence, Cove

position += sizeof(m_sample);

// maybe: accumulate many objects before flushing it.
// we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes.
// maybe: accumulate many objects before flushing it.
// we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes.

/*
printName();
cout << " got data line " << buffer;
cout << " sending PAYLOAD to " << m_aggregator << endl;
*/
// Sending PAYLOAD to the CoalescenceManager
Message message;
message.setTag(CoalescenceManager::PAYLOAD);
message.setBuffer(messageBuffer);
Expand All @@ -228,10 +212,9 @@ void GenomeAssemblyReader::manageCommunicationForNewKmer(string & sequence, Cove
#endif

int period = 1000000;
if(m_loaded % period == 0) {
if(m_loaded % period == 0 && m_loaded > 0) {
printName();
cout << " loaded " << m_loaded << " sequences" << endl;

cout << "[AssemblyReader] loaded " << m_loaded << " sequences" << endl;
}
m_loaded ++;
send(m_aggregator, message);
Expand All @@ -246,7 +229,7 @@ void GenomeAssemblyReader::setFileName(string & fileName, int sample) {

#if 0
printName();
cout << " DEBUG setFileName " << m_fileName << endl;
cout << "DEBUG setFileName " << m_fileName << endl;
#endif

}
Expand Down
1 change: 0 additions & 1 deletion code/Surveyor/GenomeAssemblyReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#define GenomeAssemblyReaderHeader

#include <RayPlatform/actors/Actor.h>
/* #include <RayPlatform/files/FileReader.h> */

#include "GenomeAssemblyReader.h"
#include "CoalescenceManager.h"
Expand Down
34 changes: 10 additions & 24 deletions code/Surveyor/GenomeGraphReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
along with Ray Surveyor. If not, see <http://www.gnu.org/licenses/>.
*/

// TODO: validate that the kmer length is the same for this file
// DONE: validate that the kmer length is the same for this file
// and the provided -k argument

#include "GenomeGraphReader.h"
Expand Down Expand Up @@ -47,21 +47,12 @@ void GenomeGraphReader::receive(Message & message) {

int type = message.getTag();

/*
printName();
cout << "received tag " << type << endl;
*/

if(type == START_PARTY) {

startParty(message);

} else if(type == CoalescenceManager::PAYLOAD_RESPONSE) {

/*
printName();
cout << " DEBUG readLine because PAYLOAD_RESPONSE" << endl;
*/
// read the next line now !
readLine();
}
Expand All @@ -83,7 +74,7 @@ void GenomeGraphReader::startParty(Message & message) {
m_loaded = 0;

printName();
cout <<"opens file " << m_fileName << endl;
cout << "[GraphReader] opens file " << m_fileName << endl;

m_parent = message.getSourceActor();

Expand Down Expand Up @@ -120,11 +111,11 @@ void GenomeGraphReader::readLine() {
printName();

if(m_bad) {
cout << " Error: file " << m_fileName << " does not exist";
cout << "[GraphReader] Error: file " << m_fileName << " does not exist";
cout << endl;

} else {
cout << " finished reading file " << m_fileName;
cout << "[GraphReader] finished reading file " << m_fileName;
cout << " got " << m_loaded << " objects" << endl;
}

Expand Down Expand Up @@ -181,7 +172,7 @@ void GenomeGraphReader::readLine() {
if(m_loaded == 0) {

Message aMessage;
aMessage.setTag(CoalescenceManager::SET_KMER_LENGTH);
aMessage.setTag(CoalescenceManager::SET_KMER_INFO);

int length = sequence.length();
aMessage.setBuffer(&length);
Expand Down Expand Up @@ -235,14 +226,10 @@ void GenomeGraphReader::readLine() {

position += sizeof(m_sample);

// maybe: accumulate many objects before flushing it.
// we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes.
// maybe: accumulate many objects before flushing it.
// we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes.

/*
printName();
cout << " got data line " << buffer;
cout << " sending PAYLOAD to " << m_aggregator << endl;
*/
// Sending PAYLOAD to the CoalescenceManager
Message message;
message.setTag(CoalescenceManager::PAYLOAD);
message.setBuffer(messageBuffer);
Expand All @@ -257,10 +244,9 @@ void GenomeGraphReader::readLine() {
#endif

int period = 1000000;
if(m_loaded % period == 0) {
if(m_loaded % period == 0 && m_loaded > 0) {
printName();
cout << " loaded " << m_loaded << " sequences" << endl;

cout << "[GraphReader] loaded " << m_loaded << " sequences" << endl;
}
m_loaded ++;
send(m_aggregator, message);
Expand Down
23 changes: 17 additions & 6 deletions code/Surveyor/KmerMatrixOwner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <fstream>
#include <string>
#include <sstream>
#include <bitset>
using namespace std;

#include <math.h>
Expand Down Expand Up @@ -70,17 +71,18 @@ void KmerMatrixOwner::receive(Message & message) {

} else if(tag == PUSH_KMER_SAMPLES) {

vector<bool> samplesWithKmer;
vector<char> samplesWithKmer;

int offset = 0;

Kmer kmer;
offset += kmer.load(buffer);
int numberOfSamples = m_sampleNames->size();

char * bufferForSamples = buffer + offset;

for(int i=0; i<numberOfSamples; ++i){
bool state = bufferForSamples[i];
char state = bufferForSamples[i];
samplesWithKmer.push_back(state);
}

Expand All @@ -92,17 +94,18 @@ void KmerMatrixOwner::receive(Message & message) {

} else if(tag == PUSH_KMER_SAMPLES_END) {

vector<bool> samplesWithKmer;
vector<char> samplesWithKmer;

int offset = 0;

Kmer kmer;
offset += kmer.load(buffer);
int numberOfSamples = m_sampleNames->size();

char * bufferForSamples = buffer + offset;

for(int i=0; i<numberOfSamples; ++i){
bool state = bufferForSamples[i];
char state = bufferForSamples[i];
samplesWithKmer.push_back(state);
}

Expand All @@ -119,17 +122,25 @@ void KmerMatrixOwner::receive(Message & message) {
coolMessage.setTag(KMER_MATRIX_IS_READY);
send(m_mother, coolMessage);
m_kmerMatrixFile.close();

string kmerMatrix = m_kmerMatrix.str();
printName();
cout << "[KmerMatrixOwner] printed the Kmers Matrix: ";
cout << kmerMatrix << endl;
}

}
}


void KmerMatrixOwner::dumpKmerMatrixBuffer(Kmer & kmer, vector<bool> & samplesWithKmer, bool force) {
void KmerMatrixOwner::dumpKmerMatrixBuffer(Kmer & kmer, vector<char> & samplesWithKmer, bool force) {

m_kmerMatrix << kmer.idToWord(m_parameters->getWordSize(),0);
for(int i =0; i < (signed) samplesWithKmer.size(); ++i){
// int checkpoint = 500;
for(int i=0; i < (signed) samplesWithKmer.size(); ++i){
m_kmerMatrix << "\t" << samplesWithKmer[i];
}

m_kmerMatrix << endl;

flushFileOperationBuffer(force, &m_kmerMatrix, &m_kmerMatrixFile, CONFIG_FILE_IO_BUFFER_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion code/Surveyor/KmerMatrixOwner.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class KmerMatrixOwner : public Actor {
ostringstream m_kmerMatrix;
ofstream m_kmerMatrixFile;

void dumpKmerMatrixBuffer(Kmer & kmer, vector<bool> & samplesWithKmer, bool force);
void dumpKmerMatrixBuffer(Kmer & kmer, vector<char> & samplesWithKmer, bool force);
void createKmersMatrixOutputFile();

void printMatrixHeader();
Expand Down
Loading