Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/bioinfologics/sdg
Browse files Browse the repository at this point in the history
  • Loading branch information
gonzalogacc committed Aug 23, 2019
2 parents cf62c3d + 4db2db1 commit 4400a82
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 274 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ SET(source_files ${source_files}
src/sdglib/batch_counter/BatchKmersCounter.cc
deps/xxhash/xxhash.c
src/sdglib/mappers/SequenceMapper.cc
src/sdglib/processors/LinkageMaker.cc)
src/sdglib/processors/LinkageMaker.cc src/sdglib/mappers/GraphSelfAligner.cc src/sdglib/mappers/GraphSelfAligner.hpp)

## Libraries
add_library(sdg SHARED
Expand Down
33 changes: 30 additions & 3 deletions interfaces/SequenceDistanceGraph.i.in
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "sdglib/views/NodeView.hpp"
#include "sdglib/mappers/SequenceMapper.hpp"
#include "sdglib/mappers/GraphSelfAligner.hpp"


#include "sdglib/factories/KMerFactory.hpp"
Expand Down Expand Up @@ -75,8 +76,9 @@
%feature("director") DistanceGraph;
%feature("director") SequenceDistanceGraph;

// Pure types
%template(vectorInt) std::vector<int>;
%template(vectorChar) std::vector<char>;
%template(vectorUChar) std::vector<unsigned char>;
%template(vectorDouble) std::vector<double>;
%template(vectorFloat) std::vector<float>;
%template(vectorString) std::vector<std::string>;
Expand All @@ -89,9 +91,12 @@
%template(usetUINT64) std::unordered_set<uint64_t>;
%template(usetUINT128) std::unordered_set<__uint128_t, int128_hash>;

//SG types
%template(vectorSGNode) std::vector<int64_t>;
%template(vectorvectorSGNode) std::vector<std::vector<int64_t>>;

%template(vectorPairIntInt) std::vector<std::pair<int32_t, int32_t>>;
%template(vectorvectorPairIntInt) std::vector<std::vector<std::pair<int32_t, int32_t>>>;
%template(vectorPairBoolUINT64) std::vector<std::pair<bool, uint64_t>>;
%template(SGNodePair) std::pair<int64_t, int64_t>;
%template(vectorSGNodePair) std::vector<std::pair<int64_t, int64_t>>;
%template(setSGNodePair) std::set<std::pair<int64_t, int64_t>>;
Expand Down Expand Up @@ -139,6 +144,11 @@ std::string __repr__() {
__REPR__();
};

%extend SequenceMatch{
__STR__();
__REPR__();
};

%extend KmerIDX{
__STR__();
__REPR__();
Expand Down Expand Up @@ -219,6 +229,7 @@ __commit__ = "@GIT_COMMIT_HASH@"

%include "sdglib/views/NodeView.hpp"
%include "sdglib/mappers/SequenceMapper.hpp"
%include "sdglib/mappers/GraphSelfAligner.hpp"

%include "sdglib/indexers/UniqueKmerIndex.hpp"

Expand Down Expand Up @@ -329,8 +340,24 @@ __commit__ = "@GIT_COMMIT_HASH@"
return oss.str();
}
}

%template(vectorvectorLongReadMapping) std::vector<std::vector<LongReadMapping>>;
%template(vectorSequenceMatch) std::vector<SequenceMatch>;
%extend std::vector<SequenceMatch>{
const std::string __repr__() {
std::string s="<Vector: "+std::to_string(self->size())+" SequenceMatches>";
return s;
}
const std::string __str__() {
if (self->empty()) return "[]";
std::ostringstream oss;
oss<<"["<<std::endl;
for (const auto &l:*self) oss<<"<"<<l<<">,"<<std::endl;
oss.seekp(-2,oss.cur);
oss<<std::endl<<"]";
return oss.str();
}
}
%template(vectorvectorSequenceMatch) std::vector<std::vector<SequenceMatch>>;
%template(vectorTagNeighbour) std::vector<TagNeighbour>;
%template(vectorvectorTagNeighbour) std::vector<std::vector<TagNeighbour>>;
%template(vectorHaplotypeScore) std::vector<HaplotypeScore>;
Expand Down
4 changes: 2 additions & 2 deletions src/sdglib/datastores/LinkedReadsDatastore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ void LinkedReadsDatastore::load_index(std::string _filename){

uint64_t sname=0;
fread( &sname, sizeof(sname), 1, fd);
default_name.resize(sname);
fread( (char *) default_name.data(), sizeof(char), sname, fd);
name.resize(sname);
fread( (char *) name.data(), sizeof(char), sname, fd);

fread( &readsize,sizeof(readsize),1,fd);
fread(&s,sizeof(s),1,fd); read_tag.resize(s);
Expand Down
2 changes: 1 addition & 1 deletion src/sdglib/datastores/LongReadsDatastore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ void LongReadsDatastore::load_index(std::string &file) {

input_file.read((char*)&fPos, sizeof(fPos));

sdglib::read_string(input_file, default_name);
sdglib::read_string(input_file, name);

input_file.seekg(fPos);
sdglib::read_flat_vector(input_file, read_to_fileRecord);
Expand Down
4 changes: 2 additions & 2 deletions src/sdglib/datastores/PairedReadsDatastore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ void PairedReadsDatastore::load_index(){

uint64_t sname=0;
fread( &sname, sizeof(sname), 1, fd);
default_name.resize(sname);
fread( (char *) default_name.data(), sizeof(char), sname, fd);
name.resize(sname);
fread( (char *) name.data(), sizeof(char), sname, fd);

fread( &readsize,sizeof(readsize),1,fd);

Expand Down
2 changes: 1 addition & 1 deletion src/sdglib/graph/DistanceGraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ NodeView DistanceGraph::get_nodeview(sgNodeID_t n) {
return NodeView(this,n);
}

std::vector<NodeView> DistanceGraph::get_all_nodeviews(bool include_disconnected) {
std::vector<NodeView> DistanceGraph::get_all_nodeviews(bool include_disconnected) const {
uint64_t c=0;
for (auto nidx=0;nidx<sdg.nodes.size();++nidx) {
auto &n=sdg.nodes[nidx];
Expand Down
2 changes: 1 addition & 1 deletion src/sdglib/graph/DistanceGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class DistanceGraph {
* @param include_disconnected If set to false and a node is disconnected, it won't be included in the result
* @return A NodeView list containing one per node in the graph unless include_disconnected is set to false, then only nodes with links are included
*/
std::vector<NodeView> get_all_nodeviews(bool include_disconnected=true);
std::vector<NodeView> get_all_nodeviews(bool include_disconnected=true) const;
DistanceGraph& operator=(const DistanceGraph &o);

friend std::ostream& operator<<(std::ostream &os, const DistanceGraph &dg);
Expand Down
25 changes: 25 additions & 0 deletions src/sdglib/mappers/GraphSelfAligner.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//
// Created by Bernardo Clavijo (EI) on 2019-08-22.
//

#include <sdglib/indexers/NKmerIndex.hpp>
#include <sdglib/mappers/SequenceMapper.hpp>
#include "GraphSelfAligner.hpp"
#include <sdglib/views/NodeView.hpp>

void GraphSelfAligner::self_align() {
matches.clear();
matches.resize(dg.sdg.nodes.size());
SequenceMapper sm(dg,k,max_kfreq);
auto nvs=dg.get_all_nodeviews();
#pragma omp parallel for schedule(static,200)
for (auto i=0;i<nvs.size();++i){
auto &nv=nvs[i];
for (auto &m:sm.map_sequence(nv.sequence().c_str(),nv.node_id())){
if (llabs(m.node)!=nv.node_id()){
matches[llabs(nv.node_id())].emplace_back(nv.node_id(),m.node,m.qStart,m.qEnd,m.nStart,m.nEnd,m.score);
}
}
std::sort(matches[llabs(nv.node_id())].begin(),matches[llabs(nv.node_id())].end());
}
}
23 changes: 23 additions & 0 deletions src/sdglib/mappers/GraphSelfAligner.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//
// Created by Bernardo Clavijo (EI) on 2019-08-22.
//

#pragma once

#include <sdglib/graph/DistanceGraph.hpp>
#include <sdglib/types/MappingTypes.hpp>

class GraphSelfAligner {

public:
GraphSelfAligner(const DistanceGraph &_dg, int _k=31, int _max_kfreq=200):dg(_dg),k(_k),max_kfreq(_max_kfreq){};
explicit GraphSelfAligner(const SequenceDistanceGraph &_dg, int _k=31, int _max_kfreq=200):
GraphSelfAligner(static_cast<const DistanceGraph &>(_dg),_k,_max_kfreq){};

void self_align();

const DistanceGraph &dg;
int k;
int max_kfreq;
std::vector<std::vector<SequenceMatch>> matches;
};
Loading

0 comments on commit 4400a82

Please sign in to comment.