-
Notifications
You must be signed in to change notification settings - Fork 237
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update text processing tools (#1385)
* add header * ad conditional * move delimiter to 'select by field' branch * bump tool version * fix access to list parameter * Drop `-b` option Doesn't make sense given we restrict `input` to `txt` format. Also refactor Cheetah code. * add nl * update dependencies * add output number test * add styler R script * fix test data * add citation and creators * Trying to fix grep basic regex * Update tools/text_processing/text_processing/macros.xml --------- Co-authored-by: Marie Jossé <[email protected]> Co-authored-by: Matthias Bernt <[email protected]> Co-authored-by: Nicola Soranzo <[email protected]> Co-authored-by: Pavankumar Videm <[email protected]>
- Loading branch information
1 parent
06dd963
commit 70980e3
Showing
26 changed files
with
254 additions
and
125 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
library("argparse") | ||
library("styler") | ||
|
||
parser <- ArgumentParser(description = "Call styler") | ||
parser$add_argument("dir", | ||
metavar = "DIR", type = "character", | ||
help = "File to parse" | ||
) | ||
parser$add_argument("--dry", | ||
choices = c("off", "on"), default = "on" | ||
) | ||
args <- parser$parse_args() | ||
|
||
file_info <- file.info(args$dir) | ||
is_directory <- file_info$isdir | ||
|
||
if (is_directory) { | ||
captured_output <- capture.output({ | ||
result <- style_dir(args$dir, indent_by = 4, dry = args$dry, recursive = TRUE) | ||
}) | ||
} else { | ||
captured_output <- capture.output({ | ||
result <- style_file(args$dir, indent_by = 4, dry = args$dry) | ||
}) | ||
} | ||
|
||
n <- nrow(subset(result, changed == TRUE)) | ||
if (n > 0) { | ||
if (args$dry == "off") { | ||
print(paste("Changed", n, "files")) | ||
} else { | ||
stop(paste("Linting failed for", n, "files")) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
<tool id="tp_awk_tool" name="Text reformatting" version="@[email protected]"> | ||
<tool id="tp_awk_tool" name="Text reformatting" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>with awk</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<requirements> | ||
<requirement type="package" version="4.2.0">gawk</requirement> | ||
<requirement type="package" version="5.3.0">gawk</requirement> | ||
</requirements> | ||
<version_command>awk --version | head -n 1</version_command> | ||
<command> | ||
|
@@ -15,9 +16,9 @@ | |
-v FS=' ' | ||
-v OFS=' ' | ||
--re-interval | ||
-f "$awk_script" | ||
"$infile" | ||
> "$outfile" | ||
-f '$awk_script' | ||
'$infile' | ||
> '$outfile' | ||
]]> | ||
</command> | ||
<configfiles> | ||
|
@@ -119,7 +120,6 @@ The select tool searches the data for lines containing or not containing a match | |
- **$** matches the end of a line or string. | ||
- **\|** Separates alternate possibilities. | ||
@REFERENCES@ | ||
]]> | ||
</help> | ||
<expand macro="citations" /> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
<tool id="tp_cut_tool" name="Advanced Cut" version="@[email protected]"> | ||
<tool id="tp_cut_tool" name="Advanced Cut" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>columns from a table (cut)</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
|
@@ -8,14 +8,18 @@ | |
<command> | ||
<![CDATA[ | ||
cut | ||
#if str($delimiter) != '': | ||
-d"${delimiter}" | ||
$complement | ||
$cut_type_options.cut_element | ||
#if $cut_type_options.cut_element != '-f' | ||
'$cut_type_options.list' | ||
#else | ||
#if str($cut_type_options.delimiter) != '': | ||
-d"$cut_type_options.delimiter" | ||
#end if | ||
'$cut_type_options.colnames_option.list' | ||
#end if | ||
${complement} | ||
${cut_type_options.cut_element} | ||
'${cut_type_options.list}' | ||
'${input}' | ||
> '${output}' | ||
'$input' | ||
> '$output' | ||
]]> | ||
</command> | ||
<inputs> | ||
|
@@ -24,23 +28,33 @@ | |
<option value="">Keep</option> | ||
<option value="--complement">Discard</option> | ||
</param> | ||
<param name="delimiter" type="select" label="Delimited by"> | ||
<option value="">Tab</option> | ||
<option value=" ">Whitespace</option> | ||
<option value=".">Dot</option> | ||
<option value=",">Comma</option> | ||
<option value="-">Dash</option> | ||
<option value="_">Underscore</option> | ||
<option value="|">Pipe</option> | ||
</param> | ||
<conditional name="cut_type_options"> | ||
<param name="cut_element" type="select" label="Cut by"> | ||
<option value="-f">fields</option> | ||
<option value="-c">characters</option> | ||
<option value="-b">bytes</option> | ||
</param> | ||
<when value="-f"> | ||
<param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> | ||
<param name="delimiter" type="select" label="Delimited by"> | ||
<option value="">Tab</option> | ||
<option value=" ">Whitespace</option> | ||
<option value=".">Dot</option> | ||
<option value=",">Comma</option> | ||
<option value="-">Dash</option> | ||
<option value="_">Underscore</option> | ||
<option value="|">Pipe</option> | ||
</param> | ||
<conditional name="colnames_option"> | ||
<param name="header" type="select" label="Is there a header for the data's columns ?"> | ||
<option value="Y">Yes</option> | ||
<option value="N" selected="true">No</option> | ||
</param> | ||
<when value="Y"> | ||
<param name="list" type="data_column" data_ref="input" use_header_names="true" multiple="true" label="List of Fields" help="(-f)" /> | ||
</when> | ||
<when value="N"> | ||
<param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> | ||
</when> | ||
</conditional> | ||
</when> | ||
<when value="-c"> | ||
<param name="list" type="text" value="" label="List of characters" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> | ||
|
@@ -51,15 +65,6 @@ | |
</sanitizer> | ||
</param> | ||
</when> | ||
<when value="-b"> | ||
<param name="list" type="text" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> | ||
<sanitizer> | ||
<valid initial="string.printable"> | ||
<remove value="'"/> | ||
</valid> | ||
</sanitizer> | ||
</param> | ||
</when> | ||
</conditional> | ||
</inputs> | ||
<outputs> | ||
|
@@ -70,7 +75,7 @@ | |
<conditional name="cut_type_options.cut_element"> | ||
<!-- fields --> | ||
<when value="-f"> | ||
<conditional name="delimiter"> | ||
<conditional name="cut_type_options.delimiter"> | ||
<when value="T"> | ||
<conditional name="input"> | ||
<when datatype_isinstance="interval"> | ||
|
@@ -184,20 +189,26 @@ | |
</data> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="input" value="cut1.txt"/> | ||
<param name="list" value="1,3,4"/> | ||
<param name="delimiter" value=""/> | ||
<output name="output" file="cut_results1.txt"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="input" value="cut1.txt"/> | ||
<param name="list" value="2" /> | ||
<param name="delimiter" value="" /> | ||
<conditional name="cut_type_options"> | ||
<param name="delimiter" value="" /> | ||
<conditional name="colnames_option"> | ||
<param name="header" value="Y"/> | ||
<!-- in the test we can (apparently) not select by header name --> | ||
<param name="list" value="2" /> | ||
</conditional> | ||
</conditional> | ||
<param name="complement" value="--complement" /> | ||
<output name="output" file="cut_results2.txt"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="input" value="cut1.txt"/> | ||
<param name="list" value="-3" /> | ||
<param name="delimiter" value="" /> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
<tool id="tp_easyjoin_tool" name="Join" version="@[email protected]"> | ||
<tool id="tp_easyjoin_tool" name="Join" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>two files</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<expand macro="requirements"> | ||
<requirement type="package" version="5.22.0.1">perl</requirement> | ||
<requirement type="package" version="5.32">perl</requirement> | ||
</expand> | ||
<version_command>join --version | head -n 1</version_command> | ||
<command> | ||
<![CDATA[ | ||
cp $__tool_directory__/sort-header ./ && | ||
cp '$__tool_directory__/sort-header' ./ && | ||
chmod +x sort-header && | ||
perl $__tool_directory__/easyjoin | ||
perl '$__tool_directory__/easyjoin' | ||
$jointype | ||
-t $'\t' | ||
$header | ||
|
@@ -20,8 +21,8 @@ | |
$ignore_case | ||
-1 '$column1' | ||
-2 '$column2' | ||
"$infile1" | ||
"$infile2" | ||
'$infile1' | ||
'$infile2' | ||
> '$output' | ||
]]> | ||
</command> | ||
|
@@ -109,7 +110,6 @@ This tool joins two tabular files based on a common key column. | |
* The header line (**Fruit Color Price**) was joined and kept as first line. | ||
* Missing values ( Avocado's color, missing from the first file ) are replaced with a period character. | ||
@REFERENCES@ | ||
]]> | ||
</help> | ||
<expand macro="citations" /> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
<tool id="tp_find_and_replace" name="Replace" version="@[email protected]"> | ||
<tool id="tp_find_and_replace" name="Replace" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>parts of text</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<requirements> | ||
<requirement type="package" version="5.22.0.1">perl</requirement> | ||
<requirement type="package" version="5.32">perl</requirement> | ||
</requirements> | ||
<command> | ||
<![CDATA[ | ||
|
@@ -240,7 +241,6 @@ The Find & Replace tool searches the data for lines containing or not containing | |
- **\\w** matches a single letter or digit or an underscore. | ||
- **\\s** matches a single white-space (space or tabs). | ||
@REFERENCES@ | ||
]]> | ||
</help> | ||
<expand macro="citations" /> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
<tool id="tp_grep_tool" name="Search in textfiles" version="@[email protected]"> | ||
<tool id="tp_grep_tool" name="Search in textfiles" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>(grep)</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<requirements> | ||
<requirement type="package" version="2.14">grep</requirement> | ||
<requirement type="package" version="4.4">sed</requirement><!-- for ansi2html.sh --> | ||
<requirement type="package" version="3.11">grep</requirement> | ||
<requirement type="package" version="4.8">sed</requirement><!-- for ansi2html.sh --> | ||
</requirements> | ||
<version_command>grep --version | head -n 1</version_command> | ||
<command> | ||
|
@@ -19,17 +20,17 @@ | |
-B $lines_before | ||
$invert | ||
$case_sensitive | ||
-- "${url_paste}" | ||
'${infile}' | $__tool_directory__/ansi2html.sh > "${output}" | ||
-- '${url_paste}' | ||
'${infile}' | '$__tool_directory__/ansi2html.sh' > '${output}' | ||
#else: | ||
grep | ||
$regex_type | ||
-A $lines_after | ||
-B $lines_before | ||
$invert | ||
$case_sensitive | ||
-- "${url_paste}" | ||
'${infile}' | grep -v "^--$" > "${output}" | ||
-- '${url_paste}' | ||
'${infile}' | grep -v "^--$" > '${output}' | ||
#end if | ||
]]> | ||
</command> | ||
|
@@ -42,9 +43,9 @@ | |
</param> | ||
|
||
<param name="regex_type" type="select" label="Type of regex"> | ||
<option value="-G">Basic</option> | ||
<option value="-P" selected="true">Perl</option> | ||
<option value="-E">Extended (egrep)</option> | ||
<option value="-G">Basic (-G)</option> | ||
<option value="-P" selected="true">Perl (-P)</option> | ||
<option value="-E">Extended (egrep -E)</option> | ||
</param> | ||
|
||
<param name="url_paste" type="text" label="Regular Expression" help="See below for more details"> | ||
|
@@ -113,16 +114,16 @@ | |
<param name="color" value="NOCOLOR" /> | ||
<output name="output" file="egrep_results1.txt" /> | ||
</test> | ||
<test><!-- same regex as egrep test, but different outcome with basic regex --> | ||
<test><!-- tests basic regex; + must be backslashed to match --> | ||
<param name="infile" value="egrep1.txt" /> | ||
<param name="case_sensitive" value="case sensitive" /> | ||
<param name="regex_type" value="-G" /> | ||
<param name="invert" value="" /> | ||
<param name="url_paste" value="[^ ]+" /> | ||
<param name="url_paste" value="[^ ]\+" /> | ||
<param name="lines_before" value="0" /> | ||
<param name="lines_after" value="0" /> | ||
<param name="color" value="NOCOLOR" /> | ||
<output name="output" file="egrep_results2.txt" /> | ||
<output name="output" file="egrep_results1.txt" /> | ||
</test> | ||
</tests> | ||
<help> | ||
|
@@ -187,7 +188,6 @@ The select tool searches the data for lines containing or not containing a match | |
- **$** matches the end of a line or string. | ||
- **\|** Separates alternate possibilities. | ||
@REFERENCES@ | ||
]]> | ||
</help> | ||
<expand macro="citations" /> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
<tool id="tp_head_tool" name="Select first" version="@[email protected]"> | ||
<tool id="tp_head_tool" name="Select first" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>lines from a dataset (head)</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<expand macro="requirements" /> | ||
<version_command>head --version | head -n 1</version_command> | ||
<command> | ||
|
@@ -56,7 +57,6 @@ will produce:: | |
chr7 56632 56652 D17003_CTCF_R6 310 + | ||
chr7 56736 56756 D17003_CTCF_R7 354 + | ||
@REFERENCES@ | ||
]]> | ||
</help> | ||
<expand macro="citations" /> | ||
|
Oops, something went wrong.