-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcmdMa.cpp
437 lines (406 loc) · 18.7 KB
/
cmdMa.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/**
* @file cmdMa.cpp
* @brief Parses the program options
* @details
* Sets up the computational graph and executes it.
* @author Markus Schmidt
* @copyright
Copyright 2018 Markus Schmidt, Arne Kutzner
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#endif
#include "ma/container/fMIndex.h"
#include "ma/container/nucSeq.h"
#include "ma/container/pack.h"
#include "ma/module/fileReader.h"
#include "ma/module/fileWriter.h"
#include "ma/util/execution-context.h"
#include "ma/util/export.h"
#include "ms/util/version.h"
#include "util/debug.h"
/// @cond DOXYGEN_SHOW_SYSTEM_INCLUDES
#include <iostream>
#include <string.h>
#include <thread>
/// @endcond
using namespace libMA;
using namespace libMS;
const std::string sHeader =
"========================================= The Modular Aligner =========================================";
const std::string sIndentOptions = " ";
void fromatedPrint( const std::string sStr, const std::string& sIndentDesc )
{
std::istringstream xStream( sStr );
size_t uiCharCount = 0;
const size_t uiMaxCharCnt = sHeader.size( ) - sIndentDesc.size( );
for( std::string sWord; xStream >> sWord; )
{
if( uiCharCount + sWord.size( ) >= uiMaxCharCnt )
{
uiCharCount = 0;
std::cout << std::endl << sIndentDesc;
} // if
std::cout << sWord << " ";
uiCharCount += sWord.size( ) + 1;
} // for
std::cout << std::endl;
} // function
void printOption( std::string sName,
const char cShort,
const std::string& sTypeName,
const std::string& sDefaultVal,
const std::string& sDescription,
const std::string& sSetDescription,
const std::string& sIndentDesc )
{
std::string sOptionHead = sIndentOptions;
if( cShort != AlignerParameterBase::NO_SHORT_DEFINED )
{
sOptionHead.append( "-" );
sOptionHead.push_back( cShort );
sOptionHead.append( ", " );
} // if
std::replace( sName.begin( ), sName.end( ), ' ', '_' );
sOptionHead.append( "--" );
sOptionHead.append( sName );
sOptionHead.append( " <" );
sOptionHead.append( sTypeName );
sOptionHead.append( "> [" );
sOptionHead.append( sDefaultVal );
sOptionHead.append( "]" );
if( sOptionHead.size( ) < sIndentDesc.size( ) - 4 )
{
std::cout << sOptionHead;
for( size_t i = sOptionHead.size( ); i < sIndentDesc.size( ); i++ )
std::cout << " ";
} // if
else
std::cout << sOptionHead << std::endl << sIndentDesc;
fromatedPrint( sDescription, sIndentDesc );
std::cout << sIndentDesc;
fromatedPrint( "{ " + sSetDescription + " }", sIndentDesc );
std::cout << std::endl;
} // function
void generateHelpMessage( ParameterSetManager& rManager, bool bFull = true )
{
std::string sIndentDesc;
for( size_t uiI = 0; uiI < sHeader.size( ) / 2; uiI++ )
sIndentDesc += " ";
std::cout << sHeader << std::endl;
// presettings
std::cout << "Available presettings:" << std::endl;
std::string sOptions = "'";
for( auto& xPair : rManager.xParametersSets )
{
std::string sOut = xPair.second->sName;
std::replace( sOut.begin( ), sOut.end( ), ' ', '_' );
sOptions += sOut + "', '";
} // for
sOptions.pop_back( );
sOptions.pop_back( );
sOptions.pop_back( );
printOption(
"Presetting",
'p',
"name",
rManager.xParametersSets.begin( )->first,
"Optimize aligner parameters for a selected sequencing technique. Available presettings are: " + sOptions + ".",
"This parameter sets the presetting.",
sIndentDesc );
// general options
std::cout << "General options:" << std::endl;
printOption( "Index",
'x',
"file_name",
"",
"Filename of FMD-index. (A FMD-index can be generated via the --Create_Index option.) This option "
"must be set.",
"Independent of presettings.",
sIndentDesc );
printOption( "In",
'i',
"file_name",
"",
"Filenames of Fasta/Fastq files containing reads. gz-compressed files are automatically decompressed. "
"Multiple files can be specified by a comma separated list. One file name must be provided at least.",
"Independent of presettings.",
sIndentDesc );
printOption( "Mate_In",
'm',
"file_name",
"",
"Filenames of the mates in the case of paired reads. If this option is set, the aligner switches to "
"paired mode automatically. The number of reads given as mates must match the accumulated "
"number of reads provided via the 'in'-option.",
"Independent of presettings.",
sIndentDesc );
printOption( "Create_Index",
'X',
"fasta_file_name,output_folder,index_name",
"",
"Generate a FMD-index for a Fasta file. 'fasta_file_name' has to be the file-path of the Fasta file "
"holding the genome used for index creation. 'output_folder' is the folder-path of the location used "
"for index storage. 'index_name' is the name used for identifying the new FMD-Index. In the context "
"of alignments, the genome-name is used for FMD-index selection.",
"Independent of presettings.",
sIndentDesc );
for( auto xPair : rManager.pGeneralParameterSet->xpParametersByCategory )
{
for( auto pParameter : xPair.second )
printOption( pParameter->sName,
pParameter->cShort,
pParameter->type_name( ),
pParameter->asText( ),
pParameter->sDescription,
pParameter->sSetDesc,
sIndentDesc );
} // for
if( bFull )
{
std::map<std::pair<size_t, std::string>, std::vector<std::shared_ptr<AlignerParameterBase>>> xCompleteMap;
for( auto xPair : rManager.getSelected( )->xpParametersByCategory )
xCompleteMap[ xPair.first ].insert(
xCompleteMap[ xPair.first ].end( ), xPair.second.begin( ), xPair.second.end( ) );
for( auto xPair : pGlobalParams->xpParametersByCategory )
xCompleteMap[ xPair.first ].insert(
xCompleteMap[ xPair.first ].end( ), xPair.second.begin( ), xPair.second.end( ) );
// other options
for( auto xPair : xCompleteMap )
{
if( xPair.first == MINIMIZER_PARAMETERS || xPair.first == SV_PARAMETERS )
// Do not print MSV options
// Minimizers currently not accessible fo maCMD
continue;
std::cout << xPair.first.second << " options:" << std::endl;
// give out options sorted by name
std::sort( xPair.second.begin( ),
xPair.second.end( ),
[]( std::shared_ptr<AlignerParameterBase> pA, std::shared_ptr<AlignerParameterBase> pB ) {
return pA->sName < pB->sName;
} );
for( auto pParameter : xPair.second )
printOption( pParameter->sName,
pParameter->cShort,
pParameter->type_name( ),
pParameter->asText( ),
pParameter->sDescription,
pParameter->sSetDesc,
sIndentDesc );
} // for
} // if
std::cout << "Version " << MA_VERSION << "\nBy Markus Schmidt & Arne Kutzner" << std::endl;
std::cout << "Compiled with following switches:";
if( bLibMaWithPython )
std::cout << " WITH_PYTHON";
#ifdef POSTGRESQL
std::cout << " WITH_POSTGRESQL";
#endif
#ifdef WITH_MYSQL
std::cout << " WITH_MYSQL";
#endif
#ifdef WITH_ZLIB
std::cout << " WITH_ZLIB";
#endif
#ifdef WITH_DB
std::cout << " WITH_DB";
#endif
#if DEBUG_LEVEL > 0
std::cout << " DEBUG_MODE";
#endif
std::cout << "\nFor more information visit: https://github.com/ITBE-Lab/ma" << std::endl;
} // function
std::vector<fs::path> fsSplit( const std::string& sSubject, const std::string sRegex )
{
std::vector<fs::path> vVector;
for( std::string sPath : split( sSubject, sRegex ) )
vVector.push_back( fs::path( sPath ) );
return vVector;
} // function
/**
* main function
*/
int main( int argc, char* argv[] )
{
if( MA_VERSION != sLibMaVersion )
{
std::cerr << "Fatal error: cmbMA verion \"" << MA_VERSION << "\" does not match libMS version \""
<< sLibMaVersion << "\". Something went wrong during building/linking." << std::endl;
return 1;
} // if
ExecutionContext xExecutionContext;
// change the way output works to a simple -o for the command line aligner.
// Also disable Use Max Hardware concurrency parameter and set -t to max hardware_concurrency by default.
xExecutionContext.xParameterSetManager.pGeneralParameterSet->xSAMOutputTypeChoice->uiSelection = 2;
xExecutionContext.xParameterSetManager.pGeneralParameterSet->pbUseMaxHardareConcurrency->set( false );
xExecutionContext.xParameterSetManager.pGeneralParameterSet->piNumberOfThreads->set(
std::thread::hardware_concurrency( ) );
// remove not with respect to pbUseMaxHardareConcurrency in description...
xExecutionContext.xParameterSetManager.pGeneralParameterSet->piNumberOfThreads->sDescription =
"Number of threads used in the context of alignments.";
xExecutionContext.xParameterSetManager.pGeneralParameterSet->unregisterParameter(
xExecutionContext.xParameterSetManager.pGeneralParameterSet->xSAMOutputTypeChoice.pContent );
xExecutionContext.xParameterSetManager.pGeneralParameterSet->unregisterParameter(
xExecutionContext.xParameterSetManager.pGeneralParameterSet->xSAMOutputPath.pContent );
xExecutionContext.xParameterSetManager.pGeneralParameterSet->unregisterParameter(
xExecutionContext.xParameterSetManager.pGeneralParameterSet->pbUseMaxHardareConcurrency.pContent );
// set the mode...
for( int iI = 2; iI < argc; iI += 2 )
{
std::string sOptionName = argv[ iI - 1 ];
std::string sOptionValue = argv[ iI ];
if( sOptionName == "-p" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--presetting" )
xExecutionContext.xParameterSetManager.setSelected( sOptionValue );
} // for
if( argc <= 1 )
{
generateHelpMessage( xExecutionContext.xParameterSetManager, false );
return 0;
} // if
try
{
for( int iI = 1; iI < argc; iI++ )
{
std::string sOptionName = argv[ iI ];
// we did this already
if( sOptionName == "-p" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--presetting" )
{
iI++; // also ignore the following argument
continue;
} // if
if( sOptionName == "-x" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--index" )
{
std::string sOptionValue = argv[ iI + 1 ];
const std::string s = xExecutionContext.xGenomeManager.loadGenome( sOptionValue );
if( !s.empty( ) )
throw std::runtime_error( s );
iI++; // also ignore the following argument
continue;
} // if
if( sOptionName == "-i" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--in" )
{
std::string sOptionValue = argv[ iI + 1 ];
xExecutionContext.xReadsManager.vsPrimaryQueryFullFileName = fsSplit( sOptionValue, "," );
iI++; // also ignore the following argument
continue;
} // if
if( sOptionName == "-m" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--matein" )
{
std::string sOptionValue = argv[ iI + 1 ];
xExecutionContext.xReadsManager.vsMateQueryFullFileName = fsSplit( sOptionValue, "," );
xExecutionContext.xParameterSetManager.getSelected( )->xUsePairedReads->set( true );
iI++; // also ignore the following argument
continue;
} // if
if( sOptionName == "-X" || ParameterSetBase::uniqueParameterName( sOptionName ) == "--createindex" )
{
std::string sOptionValue = argv[ iI + 1 ];
auto vsStrings = split( sOptionValue, "," );
if( vsStrings.size( ) != 3 )
throw std::runtime_error( "--Index needs exactly three parameters" );
xExecutionContext.xGenomeManager.makeIndexAndPackForGenome(
fs::path( vsStrings[ 1 ] ), //
fs::path( vsStrings[ 0 ] ), //
vsStrings[ 2 ], //
[]( const std::string s ) { std::cout << s << std::endl; } // lambda
);
return 0;
} // if
if( iI + 1 < argc && ( argv[ iI + 1 ][ 0 ] != '-' || is_number( std::string( argv[ iI + 1 ] ) ) ) )
{
std::string sOptionValue = argv[ iI + 1 ];
iI++; // have key value pair so next element is certainly no key
if( sOptionName[ 0 ] == '-' && sOptionName[ 1 ] != '-' && sOptionName.size( ) == 2 )
xExecutionContext.xParameterSetManager.byShort( sOptionName[ 1 ] )->setByText( sOptionValue );
else if( sOptionName[ 0 ] == '-' && sOptionName[ 1 ] == '-' && sOptionName.size( ) > 2 )
xExecutionContext.xParameterSetManager.byName( sOptionName.substr( 2, sOptionName.size( ) - 2 ) )
->setByText( sOptionValue );
else
throw std::runtime_error(
std::string( "unknown option type: " )
.append( sOptionName )
.append( ". Did you forget to add the '-' or '--' at the beginning?" ) );
} // if
else // boolean flag option
{
if( sOptionName[ 0 ] == '-' && sOptionName[ 1 ] != '-' && sOptionName.size( ) == 2 )
{
auto pX = std::dynamic_pointer_cast<AlignerParameter<bool>>(
xExecutionContext.xParameterSetManager.byShort( sOptionName[ 1 ] ) );
if( pX == nullptr )
throw std::runtime_error( "Parameters need to be provided as key value pairs" );
pX->set( true );
} // if
else if( sOptionName[ 0 ] == '-' && sOptionName[ 1 ] == '-' && sOptionName.size( ) > 2 )
{
auto pX = std::dynamic_pointer_cast<AlignerParameter<bool>>(
xExecutionContext.xParameterSetManager.byName(
sOptionName.substr( 2, sOptionName.size( ) - 2 ) ) );
if( pX == nullptr )
throw std::runtime_error( "Parameters need to be provided as key value pairs" );
pX->set( true );
} // else if
else
throw std::runtime_error(
std::string( "unknown option type: " )
.append( sOptionName )
.append( ". Did you forget to add the '-' or '--' at the beginning?" ) );
} // else
} // for
if( xExecutionContext.xParameterSetManager.pGeneralParameterSet->pbPrintHelpMessage->get( ) )
{
generateHelpMessage( xExecutionContext.xParameterSetManager );
return 0;
} // if
if( xExecutionContext.xParameterSetManager.pGeneralParameterSet->pbPrintVerstion->get( ) )
{
std::cout << MA_VERSION << std::endl;
return 0;
} // if
std::pair<int, double> xPreviousProgress = std::make_pair( -1, 0 );
std::cout << "starting alignment." << std::endl;
xExecutionContext.doAlign( [ & ] //
( double dProgress, int iCurrFile, int iFilesTotal ) //
{
dProgress = (int)( dProgress * 10 );
dProgress /= 10;
std::pair<int, double> xProgress = std::make_pair( iCurrFile, dProgress );
if( xProgress > xPreviousProgress )
{
std::cerr << "\rFile " << xProgress.first + 1 << " of " << iFilesTotal
<< ": " << xProgress.second << "% aligned. "
<< std::flush;
xPreviousProgress = xProgress;
} // if
return true; // always continue the alignment
} // lambda
);
std::cerr << "\rdone. " << std::endl;
} // try
catch( std::runtime_error& ex )
{
std::cerr << "Error:\n" << ex.what( ) << std::endl;
} // catch
catch( std::exception& ex )
{
std::cerr << "Error:\n" << ex.what( ) << std::endl;
} // catch
catch( ... )
{
std::cerr << "Error:\n"
<< "unknown exception encountered" << std::endl;
} // catch
return 0;
} // main function