-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapply-model-to-real-data.xml
88 lines (88 loc) · 5.44 KB
/
apply-model-to-real-data.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve" width="90" x="45" y="75">
<parameter key="repository_entry" value="//Local Repository/processes/new-15"/>
</operator>
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve (2)" width="90" x="45" y="210">
<parameter key="repository_entry" value="//Local Repository/new-15"/>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="246" y="210">
<list key="text_directories">
<parameter key="unlabeled" value="C:\Users\jim\Desktop\test"/>
</list>
<parameter key="file_pattern" value="*"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize" width="90" x="112" y="165">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="313" y="165">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="25"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases" width="90" x="514" y="165">
<parameter key="transform_to" value="lower case"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="380" y="75">
<list key="application_parameters"/>
<parameter key="create_view" value="true"/>
</operator>
<operator activated="true" class="write_special" compatibility="5.3.015" expanded="true" height="60" name="Write Special Format" width="90" x="514" y="300">
<parameter key="example_set_file" value="C:\Users\jim\Desktop\out-15-new.txt"/>
<parameter key="special_format" value="$v[metadata_file] $t $l $t $p $t $d"/>
<parameter key="fraction_digits" value="-1"/>
<parameter key="add_line_separator" value="true"/>
<parameter key="quote_nominal_values" value="true"/>
<parameter key="zipped" value="false"/>
<parameter key="overwrite_mode" value="overwrite first, append then"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Retrieve (2)" from_port="output" to_op="Process Documents from Files" to_port="word list"/>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Write Special Format" to_port="input"/>
<connect from_op="Write Special Format" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>