-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdiscovery.html
7432 lines (6738 loc) · 291 KB
/
discovery.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<title>DDI-RDF Discovery Vocabulary</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<style type="text/css">
table { border: black 1px solid; border-collapse:collapse; border-spacing: 0; }
table td { border: black 1px solid; padding: 3px; padding-left: 10px; padding-right: 10px; text-align: center; vertical-align: top;}
.classDefinitions dd{margin-left: 25px;}
.classProperties{margin-left:25px;margin-bottom: 15px;margin-top:15px;}
.classProperties dt{margin-top: 15px;}
.figcaption {
margin-top: 0.75em;
}
@media screen
{
.screen-hidden {
display: none;
}
#combined-uml-diagram-object-properties-only {
width: 88%;
}
}
@media print
{
img {
width: 95%;
}
#combined-uml-diagram-object-properties-only {
width: 88%;
}
.print-hidden {
display: none;
}
.table {
width: 100%;
}
/*
IE: OK
Firefox: 11.3.3 "Data Sets, Data Files, and Descriptive Statistics" too wide.
*/
}
</style>
<script type="text/javascript">
//<![CDATA[
// Adjustment of size of UML diagrams to 55% of original size or if wider than the screen to 90% of screen width.
// UML diagrams must have the CSS class "uml-diagram" (no additional classes).
var maxWidth = screen.width*0.9;
var scaleFactor = 0.55;
function adjustWidthOfImages() {
var images = document.getElementsByTagName( 'img' );
for ( var i=0; i<images.length; i++ ) {
var image = images[i];
if ( image.className.toLowerCase() == 'uml-diagram' ) { // works only if this single CSS class name exists
var imageWidth = Math.round( image.naturalWidth * scaleFactor );
if ( imageWidth > maxWidth ) {
elementWidth = maxWidth;
} else {
elementWidth = imageWidth;
}
image.style.width = ( elementWidth ).toString() + 'px';
}
}
}
//]]></script>
<script type="text/javascript" src='https://www.w3.org/Tools/respec/respec-w3c-common' class='remove'></script>
<script type="text/javascript" class='remove'>
var respecConfig = {
// specification status (e.g. WD, LCWD, NOTE, etc.). If in doubt use ED.
// was unofficial. see https://github.com/w3c/respec/wiki/specStatus
specStatus: "base",
// the specification's short name, as in http://www.w3.org/TR/short-name/
shortName: "Disco",
// if your specification has a subtitle that goes below the main
// formal title, define it here
subtitle : "A vocabulary for publishing metadata about data sets (research and survey data) into the Web of Linked Data",
// if you wish the publication date to be other than today, set this
// publishDate: "2009-08-06",
// if the specification's copyright date is a range of years, specify
// the start date here:
copyrightStart: "2019",
// if there is a previously published draft, uncomment this and set its YYYY-MM-DD date
// and its maturity status
// previousPublishDate: "1977-03-15",
// previousMaturity: "WD",
// if there a publicly available Editor's Draft, this is the link
// edDraftURI: "https://raw.github.com/linked-statistics/disco-spec/master/discovery.html",
// if this is a LCWD, uncomment and set the end of its review period
// lcEnd: "2009-08-05",
// editors, add as many as you like
// only "name" is required
editors: [
{ name: "Thomas Hartmann (formerly Bosch)", url: "http://www.dr-thomashartmann.de/"},
{ name: "Richard Cyganiak", url: "https://www.linkedin.com/in/cygri",
company: "TopQuadrant", companyURL: "http://www.topquadrant.com/" },
{ name: "Joachim Wackerow", url: "https://www.gesis.org/institut/mitarbeiterverzeichnis/person/?tx_gextstaffdir_staffdirectory%5bemail%[email protected]",
company: "GESIS - Leibniz Institute for the Social Sciences, Germany", companyURL: "http://www.gesis.org/" },
{ name: "Benjamin Zapilko", url: "https://www.gesis.org/institut/mitarbeiterverzeichnis/person/?tx_gextstaffdir_staffdirectory%5bemail%[email protected]",
company: "GESIS - Leibniz Institute for the Social Sciences, Germany", companyURL: "http://www.gesis.org/" },
],
// authors, add as many as you like.
// This is optional, uncomment if you have authors as well as editors.
// only "name" is required. Same format as editors.
authors: [
{ name: "Thomas Hartmann (formerly Bosch)", url: "http://www.dr-thomashartmann.de/"},
{ name: "Sarven Capadisli", url: "http://csarven.ca/",
company: "Enterprise Information Systems (EIS)", companyURL: "http://eis.iai.uni-bonn.de/" },
{ name: "Franck Cotton", //url: "http://example.org/",
company: "INSEE - Institut National de la Statistique et des Études Économiques, France", companyURL: "http://www.insee.fr/en/default.asp" },
{ name: "Richard Cyganiak", url: "https://www.linkedin.com/in/cygri",
company: "TopQuadrant", companyURL: "http://www.topquadrant.com/" },
{ name: "Arofan Gregory", //url: "http://example.org/",
company: "ODaF - Open Data Foundation, USA", companyURL: "http://www.opendatafoundation.org/" },
{ name: "Benedikt Kämpgen", url: "http://www.aifb.kit.edu/web/Benedikt_K%C3%A4mpgen/en",
company: "Institute of Applied Informatics and Formal Description Methods (AIFB), Germany", companyURL: "http://www.aifb.kit.edu/web/Hauptseite/en" },
{ name: "Olof Olsson", url: "https://www.gu.se/english/about_the_university/staff/?languageId=100001&userId=xoolof&departmentId=169030",
company: "SND - Swedish National Data Service", companyURL: "http://snd.gu.se/en" },
{ name: "Heiko Paulheim", url: "http://www.heikopaulheim.com/",
company: "Data and Web Science Group, University of Mannheim, Germany", companyURL: "http://dws.informatik.uni-mannheim.de/" },
{ name: "Joachim Wackerow", url: "http://www.gesis.org/en/institute/staff/?alpha=W&name=joachim%2Cwackerow",
company: "GESIS - Leibniz Institute for the Social Sciences, Germany", companyURL: "http://www.gesis.org/" },
{ name: "Benjamin Zapilko", url: "http://www.gesis.org/en/institute/staff/?alpha=Z&name=benjamin%2Czapilko",
company: "GESIS - Leibniz Institute for the Social Sciences, Germany", companyURL: "http://www.gesis.org/" },
],
// If specified, defines an array of alternate formats in which document is available (e.g., XML, Postscript). The format of the array is:
// key description
// uri the relative or absolute URI to the alternate version.
// label the label to use for the version.
alternateFormats: [
{ uri: "DDI-RDF_Discovery_Vocabulary.pdf",
label: "PDF" }
],
// generates various RDFa attributes if set to true
// set to false for test purposes
doRDFa: false,
// name of the WG
// Apparently, not used.
wg: "DDI Alliance RDF Vocabularies Working Group",
// URI of the public WG page
// Apparently, not used.
wgURI: "http://www.ddialliance.org/community/working-groups",
// name (without the @w3c.org) of the public mailing to which comments are due
// Apparently, this has to be a W3C address.
// Apparently, not used.
wgPublicList: "[email protected]",
// URI of the patent status for this WG, for Rec-track documents
// !!!! IMPORTANT !!!!
// This is important for Rec-track documents, do not copy a patent URI from a random
// document unless you know what you're doing. If in doubt ask your friendly neighbourhood
// Team Contact.
wgPatentURI: "http://creativecommons.org/licenses/by/4.0/",
maxTocLevel: "2",
};
</script>
</head>
<body onload="adjustWidthOfImages()">
<section id='abstract'>
This specification defines the DDI-RDF Discovery Vocabulary (Disco), an RDF Schema vocabulary that enables discovery of research and survey data on the Web. It is based on <a href="http://www.ddialliance.org/what">DDI (Data Documentation Initiative)</a> XML formats.
</section>
<section id="sotd">
<p>
The DDI-RDF Discovery Vocabulary is a draft specification of the DDI Alliance.
</p>
<p>This specification is produced by the subgroup on Disco (chair Joachim Wackerow) of the
<a href="http://www.ddialliance.org/alliance/working-groups#RDF">RDF Vocabularies Working Group</a>,
a working group at the <a href="http://www.ddialliance.org/alliance">DDI Alliance</a>.</p>
<p>Resources:</p>
<ul>
<li><a href="http://www.ddialliance.org/Specification/RDF/Discovery">Webpage at DDI Alliance</a></li>
<li><a href="https://groups.google.com/group/ddi-rdf-vocabulary">Google Group</a></li>
<li><a href="https://github.com/linked-statistics/disco-spec/issues">Issue tracker</a></li>
<li><a href="https://github.com/linked-statistics/disco-spec/">GitHub repository</a></li>
</ul>
</section>
<section id="tof">
<!-- placeholder for generated table of diagrams -->
</section>
<section>
<h2>Introduction</h2>
<p>
The namespace for all terms in this ontology is: http://rdf-vocabulary.ddialliance.org/discovery#".
</p>
<p>
Normative formats of the DDI-RDF Discovery Vocabulary specification are
<ul>
<li>this HTML specification, and </li>
<li>the <a href="https://raw.github.com/linked-statistics/disco-spec/master/discovery.ttl">Turtle</a> file.</li>
</ul>
</p>
<p>
There is also a <a href="https://raw.github.com/linked-statistics/disco-spec/master/discovery.xml">non-canonical RDF/XML version</a> of the Turtle file.
</p>
<p>
Open issues are discussed on the issue tracker: <a href="https://github.com/linked-statistics/disco-spec/issues">open issues</a>.
</p>
<p>
A detailed overview of the Disco vocabulary is available as <a href="http://www.essepuntato.it/lode/owlapi/https://raw.github.com/linked-statistics/disco-spec/master/discovery.ttl">LODE view</a>
or a <a href="http://vowl.visualdataweb.org/webvowl/index.html#iri=http://rdf-vocabulary.ddialliance.org/discovery.ttl">web view</a> using the web application <a href="http://vowl.visualdataweb.org/webvowl.html">Web-based Visualization of Ontologies</a>.
</p>
<p>
For a detailed explanation of DDI terms please refer to <a href="#overview">section 2</a>.
</p>
<section>
<h3>Scope and Purpose</h3>
<p>This specification is designed to support the discovery of
microdata sets and related metadata using RDF technologies in the
Web of Linked Data. Many archives and other organizations have
large amounts of data, sometimes publically available, but often
confidential in nature, requiring applications for access. Many
such organizations use the
<a href="http://www.ddialliance.org/what">Data Documentation Initiative</a>
standard, which is a proven and highly detailed XML metadata
format for describing rectangular data sets of this type.
This vocabulary makes use of the DDI specification to create a
simplified version of this model for the discovery of data files.</p>
<p>The data holdings of data archives are often collected by
researchers, and only afterwards disseminated by archives. Other
data-producing organizations such as research centers and
statistical agencies are also increasingly interested in the
DDI standards for documenting their own microdata. In general
terms, most DDI metadata describes data sets for the social,
behavioural, and economic sciences. This data is fairly
consistent in format, consisting of rectangular data files with
columns containing variables for a set of cases, contained in
the rows. It is often collected by survey, although in some
cases may come from administrative sources, sensors, or
registers.</p>
<p>This vocabulary is intended not only for use by the research
data community, but also by any others needing an RDF vocabulary
for describing this type of rectangular data. This vocabulary
will provide a useful model for describing some of the data sets
now being published by open government initiatives, by providing
a rich metadata structure for them. While the data sets may be
available (typically as CSV files) the metadata which
accompanies them is not necessarily coherent, making the
discovery of these data sets difficult. This vocabulary would
help to overcome this difficulty by allowing for the creation of
standard queries to programmatically identify data sets, whether
made available by government or held within a data archive.</p>
<p>
Disco could be used to discover datasets by searching for specific questions, topics, and geographical coverage.
Depending on the complexity of the search respectively of the data portal, parts of Disco could be used, the complete Disco, or Disco together with related vocabularies.
The document [<cite><a class="bibref" href="#bib-Scenarios">Scenarios</a></cite>] by Vompras, Gregory, Bosch, Capadisli, and Wackerow describes typical use cases for the applicability of the DDI-RDF Discovery vocabulary. In the Section <a class="bibref" href="#use-cases-and-example-queries">Use Cases and Example Queries</a> of the Appendix additional discovery use cases are illustrated by several SPARQL queries.</p>
</section>
<p>
Statistical domain experts (core members of the DDI Alliance Technical Implementation Committee,
representatives of national statistical institutes, national data archives) and Linked Open Data community
members have selected the DDI elements which are seen as most important to solve problems associated with
use cases in the area of data discovery. Section 2 gives an overview of the conceptual model.
More detailed descriptions of all the properties are given in the specification and two conference papers [<cite><a class="bibref" href="#bib-Linked-Statistical-Data">Linked-Statistical-Data</a></cite>] [<cite><a class="bibref" href="#bib-DDI-RDF-Discovery-Vocabulary">DDI-RDF-Discovery-Vocabulary</a></cite>].
Disco is intended to provide means to describe microdata by essential metadata for the discovery purpose.
Existing DDI-XML instances can be transformed into this RDF format and therefore exposed as Linked Data.
The vice-versa process is not intended, as we have defined Disco components and reused components of other
RDF vocabularies which make only sense in the Linked Data field.
</p>
<section>
<h3>About DDI</h3>
<p>The Data Documentation Initiative standards are produced and
maintained by a member-based consortium of global scope, the
<a href="http://www.ddi-alliance.org/">DDI Alliance</a>. Housed
currently at the
<a href="http://www.icpsr.umich.edu/">Interuniversity Consortium
for Political and Social Research</a> (ICPSR) at the University
of Michigan, there are currently more than 30 member
institutions. The standards have been under development for more
than ten years, and are in widespread use among data archives
and libraries, producers of research data, secure data centers,
and statistical agencies.</p>
<p>There are two major versions of DDI (both serialied in XML format): the
“<a href="http://www.ddialliance.org/Specification/DDI-Codebook/">Codebook</a>”
version, which allows for holding general information
about a study, along with its data dictionary; and the
“<a href="http://www.ddialliance.org/Specification/DDI-Lifecycle/">Lifecycle</a>”
version of DDI, which allows for the description of more complex
multi-wave studies, throughout the data lifecycle, from study
conception through data collection and processing.</p>
<p>This vocabulary contains a selection of the major types of metadata defined by these two versions in a highly simplified form, for the purposes of discovery. The XML
Codebook and Lifecycle versions of DDI are very broad: these
standards contain hundreds of metadata elements, providing
enough information to programmatically work with the data files
for such functions as the automatic creation of databases, and
transformations between statistical packages. DDI in both
versions is generally used to describe data found in ASCII
files, whether positional files with fixed-width fields or files
using a delimited format such as CSV.</p>
<p>It is difficult to claim that there is a single agreed
conceptual model for describing research data in the social,
behavioural, and economic sciences—there is a wide range of
models and terms. However, the issues faced in this area have
been the subject of discussion within the DDI community for many
years, and the DDI model represents the best consensus which
exists today. As such, it gives us a good basis for creating a
vocabulary which will be recognizable to researchers familiar
with this type of data.</p>
</section>
<section>
<h2>Relationship to Data Cube, DCAT and XKOS</h2>
<p>The Discovery Vocabulary (Disco) is aligned to several other metadata
vocabularies used in the RDF community. Disco is designed to be used in conjunction with other vocaularies.</p>
<p>The <a href="http://www.w3.org/TR/vocab-dcat/">Data Catalog
Vocabulary</a> (DCAT) is a W3C standard for describing catalogs
of datasets, and we map to it in two places:
Our <code><a href="#dfn-disco-logicaldataset" class="internalDFN">LogicalDataSet</a></code> is a subclass of DCAT’s Dataset, and our
<code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code> is a subclass of DCAT’s Distribution. DCAT makes few
assumptions about the kind of datasets being described,
and focuses on general metadata about the datasets
(mostly using Dublin Core), and on different ways of
distributing and accessing the dataset, including availability
of the dataset in multiple formats. Combining terms from both
DCAT and the Discovery Vocabulary can be useful for a number of
reasons:</p>
<ul>
<li>Describing collections (catalogs) of research datasets (DCAT)</li>
<li>Providing additional information about physical aspects (file size, file formats) of research data files (DCAT)</li>
<li>Providing information about the data collection that produced the datasets in a data catalog (Disco)</li>
<li>Providing information about the logical structure (variables, concepts, etc.) of tabular datasets in a data catalog (Disco)</li>
</ul>
<p>DCAT is richer for the description of collections and catalogue. Disco supports richer descriptions of groups of datasets or individual datasets. In this spec, some of our examples are partially based on DCAT (and we will indicate when this is the case).</p>
<p>The <a href="http://www.w3.org/TR/vocab-data-cube/">Data Cube
vocabulary</a> is a W3C standard for representing data cubes,
that is, multidimensional aggregate data. Data cubes are often
generated by tabulating or aggregating record-level datasets.
For example, if an observation in a census data cube indicates
the population of a certain age group in a certain region is
12345, then this fact was obtained by aggregating that number of
individual records from a record-level (or “microdata”) dataset.
The Discovery Vocabulary contains a property “aggregation” (pointing from a Disco data set to a Data Cube dataset) that
indicates that a Cube dataset was derived by tabulating a
record-level dataset.</p>
<p>Data Cube provides for the description of the structure of
such cubes, but also for the representation of the cube data
itself, that is, the observations that make up the cube dataset.
This is not the case for the Discovery Vocabulary, which
only describes the structure of a dataset, but is not concerned
with representing the actual data in it. The actual data is
assumed to sit in a data file (e.g., a CSV file, or in a
proprietary statistics package file format) that is not represented
in RDF.</p>
<p>
The interplay of Data Cube and Disco needs further exploration regarding the relationship of aggregate data,
aggregation methods, and the underlying microdata.
The goal would be to drill down to the related microdata based on a search resulting in aggregate data.
On the one hand aggregate data are often easily available and gives a quick overview.
On the other hand microdata enable more detailed analyses.
</p>
<p>The use of formal statistical classifications is very common
in research data sets—these are treated in our vocabulary as
SKOS concepts, but in some cases those working with formal
statistical classifications may desire more expressive
capability than SKOS provides. To support such users, the DDI
Alliance also publishes
<a href="http://purl.org/linked-data/xkos">XKOS</a>, a
vocabulary which extends SKOS
to allow for a more complete description of such
classifications. While the use of XKOS is not required by this
vocabulary, the two are designed to work in complementary fashion.</p>
<p>More details on the relationship to Data Cube, DCAT and XKOS as well as to other vocabularies are provided in Section 9.</p>
</section>
</section>
<section>
<h2>Overview</h2>
<!-- <div class="figure">
<img class="uml-diagram" src="diagrams/overview.png" alt="Vocabulary Overview"/>
</div>
-->
<figure>
<img class="uml-diagram" src="diagrams/overview.png"/>
<figcaption>Vocabulary Overview</figcaption>
</figure>
<p>To understand the DDI Discovery Vocabulary, there are a few
central classes, which can serve as entry points. The first of
these is the <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> class. A <b><code><a href="#dfn-disco-study" class="internalDFN">Study</a></code></b> in our model represents the
process by which a data set was generated or collected. Literal
properties include information about the funding, organizational
affiliation, abstract, title, version, and other such high-level
information. In some cases, where data collection is cyclic or
on-going, data sets may be released as a <b><code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code></b>, where each
cycle or "wave" of the data collection activity produces one or
more data sets. This is typical for longitudinal studies, panel
studies, and other types of "series" (to use the DDI term). In
this case, a number of <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> objects would be collected into a
single <code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>.</p>
<p>Data sets have two representations in our model: a logical
representation, which describes the contents of the data set,
and a physical representation, which is a distributed file
holding that data. It is possible to format data files in many
different ways, even if the logical content is the same. In our
model the <b><code><a href="#dfn-disco-logicaldataset" class="internalDFN">LogicalDataSet</a></code></b> represents the content of the file
(its organization into a set of variables (<code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>)). The <code><a href="#dfn-disco-logicaldataset" class="internalDFN">LogicalDataSet</a></code>
is an extension of the <code>dcat:DataSet</code> class. Physical, distributed
files are represented by the class <b> <code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code></b> (not depicted in the diagram), which is itself an
extension of the <code>dcat:Distribution</code>.</p>
<p>When it comes to understanding the contents of the data set,
this is done using the <b><code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code></b> class. Variables (<code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>) provide a
definition of the column in a rectangular data file, and can
associate it with a particular Concept, and a <b><code><a href="#dfn-disco-question" class="internalDFN">Question</a></code></b> (the <code><a href="#dfn-disco-question" class="internalDFN">Question</a></code> in the
<b><code><a href="#dfn-disco-questionnaire" class="internalDFN">Questionnaire</a></code></b> which was used to collect the data). Variables (<code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>) are
related to a representation of some form, which may be a set of
codes and categories (a "codelist") or may be one of other
normal data types (dateTime, numeric, textual, etc.) Codes and
Categories are represented using SKOS concepts and concept
schemes.</p>
<p>Data is collected about a specific phenomenon, typically
involving some target population, and focusing on the analysis
of a particular type of subject. These are respectively
represented by the <b><code><a href="#dfn-disco-universe" class="internalDFN">Universe</a></code></b> class and the <b><code><a href="#dfn-disco-analysisunit" class="internalDFN">AnalysisUnit</a></code></b> class.
If, for example, the adult population of Finland is being
studied, the <code><a href="#dfn-disco-analysisunit" class="internalDFN">AnalysisUnit</a></code>
would be individuals or persons and the <b><code><a href="#dfn-disco-universe" class="internalDFN">Universe</a></code></b> would be the adult population of Finland.
Bosch, Cyganiak, Wackerow, and Zapilko give a detailed overview of
the DDI-RDF Discovery Vocabulary in a full paper written for the Dublin
Core conference [<cite><a class="bibref" href="#bib-Linked-Statistical-Data">Linked-Statistical-Data</a></cite>].
</p>
</section>
<section id='example'>
<h2>Real-life Example</h2>
<p>We have a sample of a survey which has been documented using
DDI XML—the 1980 Argentine National Population and Housing
Census. We are using for this example the version disseminated by <a href="http://www.ipums.org/">IPUMS</a>,
which provides internationally harmonized census data, to make
it more useful for cross-border research. Thus, this data set is
produced by two organizations: The Argentine National Institute
of Statistics and Censuses, and the Minnesota Population Center
hosted in the University of Minnesota.</p>
<p>To give some idea of what is contained in the metadata set,
we will use some screen shots from OpenMetadata Survey Catalog,
a portal which indexes the
DDI files to facilitate searching, and reflects the contents in
a fashion which is easy to view. Follow this
<a href="http://www.openmetadata.org/surveycatalog/index.php/catalog/996">link</a>
for the information about this DDI file at the OpenMetadata Survey Catalog.</p>
<figure>
<img src="images/example-overview.png"/>
<figcaption>Overview</figcaption>
</figure>
<p>Figure 2 shows us the overview page for this study, giving us
some basic information - title, identifier for the study, data producers, year, country, and
a link to the access policies. If we look at the right-hand
panel, we see an outline of the metadata contents of the file,
including information about the questionnaire used, sampling
methodology, and data collection activities, as well as the two data files which contains detailed information about its variables.</p>
<p>Not all of this information is useful in a data discovery
scenario—sampling and data collection methodologies are not
typically indexed for searches. Information about the
questionnaire is, as is detailed information about the variables
contained in the files. We will look more closely at the
metadata of primary interest for our discovery scenario.</p>
Using RDF and the DDI Discovery Vocabulary, the study can also be described in triples:
an instance of type of <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> is given the title and the identifier; also, the two
data producers are linked and further described.
The year and country are described in the form of a temporal and spatial coverage of the study.
Also, the topics of the study are represented.
The study instance further contains an abstract.
Since a study is a versionable object in DDI, we attach a version to it.
A study is further described using additional information which is described in the following Example 1.
<pre class="example">
# We will use the namespace 'ddi' in all of our examples.
ddi:Study_1 a disco:Study;
dcterms:title "National Population and Housing Census, 1980"@en;
dcterms:identifier "ARG_1980_PHC_v01_A_IPUMS";
dcterms:creator [
rdfs:label "Minnesota Population Center"@en;
skos:notation "MPC";
org:memberOf [
rdfs:label "University of Minnesota"@en;
];
];
dcterms:creator [
rdfs:label "Argentine National institute of Statistics and Censuses"@en;
]
dcterms:temporal [
a dcterms:PeriodOfTime ;
disco:startDate "1980-10-22"^^xsd:date;
disco:endDate "1980-10-22"^^xsd:date;
rdfs:comment "The interviews take place on the expected census day. In
some areas the enumeration took place the following day because of
access problems due to heavy rains.";
];
dcterms:spatial [
# This is the DC-strictly compatible way to do it
a dcterms:Location;
rdfs:label "Argentina, national coverage"@en;
];
# Only a subset of subjects mentioned in the original file
dcterms:subject [
skos:definition "Technical Variables -- HOUSEHOLD"@en ;
] ;
dcterms:subject [
skos:definition "Group Quarters Variables -- HOUSEHOLD"@en ;
] ;
dcterms:abstract "IPUMS-International is an effort to inventory, preserve,
harmonize, and disseminate census microdata from around the world. The
project has collected the world's largest archive of publicly available
census samples. The data are coded and documented consistently across
countries and over time to facilitate comparative research. IPUMS-
International makes these data available to qualified researchers free
of charge through a web dissemination system. The IPUMS project is a
collaboration of the Minnesota Population Center, National Statistical
Offices, and international data archives. Major funding is provided by
the U.S. National Science Foundation and the Demographic and Behavioral
Sciences Branch of the National Institute of Child Health and Human
Development. Additional support is provided by the University of
Minnesota Office of the Vice President for Research, the Minnesota
Population Center, and Sun Microsystems.";
owl:versionInfo "Version 1.0. This version contains selected variables from
the original census microdata plus harmonized variables from the IPUMS
International data base."@en;
disco:universe ddi:Universe_1;
disco:instrument ddi:Questionnaire_1;
disco:product ddi:Dataset_1;
disco:analysisUnit ddi:AnalysisUnit_1;
disco:kindOfData ddi:KindOfData_1;
# stdyInfo/notes currently not represented.
disco:variable ddi:AR80A401, ddi:AR80A402, ddi:AR80A404, ddi:AR80A407, ddi:AR80A411.
</pre>
<p>While the sampling methodology may not be of great interest
for those searching for data, one field within this section is:
the “universe”, that is, the population being studied. Figure 3
gives us an example of this information.</p>
<figure>
<img src="images/example-universe.png"/>
<figcaption>Coverage and Universe</figcaption>
</figure>
<p>Thus, the study refers to a specific universe.</p>
<pre class="example">
ddi:Universe_1 a disco:Universe;
skos:definition "All the population in the national territory at the moment the census is carried out."@en .
</pre>
Using a type of instrument - a questionnaire -, the study produced a dataset. The dataset has access rights.
The dataset has a concrete data file (physical representation or distributed file) populated by certain variables.
<pre class="example">
ddi:Dataset_1 a disco:LogicalDataSet;
disco:instrument ddi:Questionnaire_1;
dcterms:accessRights ddi:AccessRights_1;
disco:dataFile ddi:Datafile_1;
disco:variable ddi:AR80A401, ddi:AR80A402, ddi:AR80A404, ddi:AR80A407, ddi:AR80A411.
ddi:AccessRights_1 a dctermsRightsStatement;
dcterms:description "IPUMS-International distributes
integrated microdata of individuals and households only by agreement ...
designed to extend this record.";
rdfs:seeAlso <http://microdata.worldbank.org/index.php/catalog/442/accesspolicy>.
</pre>
<p>Figure 4 shows us the information about access policies,
which typically is of interest to those searching for data.</p>
<figure>
<img src="images/example-access-policy.png"/>
<figcaption>Access Policy</figcaption>
</figure>
<p>The Unit of Analysis and Kind of Data further describe the study.</p>
<pre class="example">
ddi:AnalysisUnit_1 a disco:AnalysisUnit ;
skos:definition "Dwelling, quarter dwelling, census household, and population"@en .
ddi:KindOfData_1 a skos:Concept ;
rdfs:label "Census/enumeration data [cen]"@en .
</pre>
<p>In some cases we may have a lot of information about the
questionnaires used, and it is very common to search for data by
the text of the question used to collect it. Sometimes there
will be a PDF of a questionnaire, and sometimes question text
may be linked to individual variables within a file. In this
case, we have only a textual description of the set of forms
used in the census (Figure 5).</p>
<figure>
<img src="images/example-questionnaires.png"/>
<figcaption>Questionnaires</figcaption>
</figure>
<p>The following example illustrates three questions. Each question does have a text.</p>
<pre class="example">
ddi:Questionnaire_1 a disco:Questionnaire;
disco:question ddi:QuestionGender;
disco:question ddi:QuestionAge;
disco:question ddi:QuestionCitizenship.
ddi:QuestionGender a disco:Question;
disco:questionText "2. Is the person a man or a woman? [] Man, [] Woman"@en.
ddi:QuestionAge a disco:Question;
disco:questionText "3. What is his or her age? _ _ Mark the age in completed
years at the date of the census for those younger than one year old mark
00. For those younger than 10 years old, mark 01, 02, 03, etc. For those
older than 99 years old, mark 99."@en.
ddi:QuestionCitizenship a disco:Question;
disco:questionText "6. [Immigration status] Only for persons who have usual
residence in Argentina and were born in another country. [Questions 6A
and 6B asked only of persons born outside Argentina and who currently
reside in Argentina.] B. Are you a naturalized citizen of Argentina?
[] Yes [] No [] Unanswered"@en.
</pre>
<p>In Figure 6 we see the list of variables contained in the
data file. For each of these we will also have a detailed view,
showing the codes and categories used to encode the actual
responses in the variables (Figure 7).</p>
<figure>
<img src="images/example-variable-list.png"/>
<figcaption>Variables List</figcaption>
</figure>
<figure>
<img src="images/example-variable-detail.png"/>
<figcaption>Variable Details</figcaption>
</figure>
<p>Any variable has a text and is based on a variable definition.</p>
<div class="note">
<p>
Please note that the turtle example describes the variable labels from the screenshot above and references to the related represented variable and question.
</p>
</div>
<pre class="example">
ddi:AR80A401 a disco:Variable;
dcterms:identifier "AR80A401";
skos:prefLabel "Sex"@en, "Sexe"@fr;
dcterms:description "This variable indicates the person's gender."@en;
disco:basedOn ddi:SexVD;
disco:question ddi:QuestionGender.
ddi:AR80A402 a disco:Variable;
dcterms:identifier "AR80A402";
dcterms:description "This variable indicates the person's age in years."@en;
skos:prefLabel "Age"@en, "Âge"@fr.
disco:basedOn ddi:AgeVD;
disco:question ddi:QuestionAge.
ddi:AR80A407 a disco:Variable;
dcterms:identifier "AR80A407";
dcterms:description "This variable indicates whether or not the person is
a naturalized citizen of Argentina."@en;
skos:prefLabel "Citizenship"@en, "Citoyenneté"@fr;
disco:basedOn ddi:CitizenshipVD;
disco:question ddi:QuestionCitizenship.
</pre>
Any variable definition has a representation defining the possible values of a variable.
Also, a variable definition has its own universe (may be the same as the study or possibly narrower) and (DDI) concepts further describing the variable.
<pre class="example">
ddi:SexVD a disco:RepresentedVariable;
disco:universe ddi:UniversePerson;
disco:representation ddi:SexRepr;
disco:concept ddi:IpumsC1;
skos:prefLabel "Sex"@en, "Sexe"@fr;
dcterms:description "Sex data element"@en.
ddi:SexRepr a skos:ConceptScheme, disco:Representation;
skos:hasTopConcept ddi:SexM, ddi:SexF.
ddi:SexM a skos:Concept;
skos:notation "1";
skos:prefLabel "Male"@en, "Homme"@fr;
skos:inScheme ddi:SexRepr.
ddi:SexF a skos:Concept;
skos:notation "2";
skos:prefLabel "Female"@en, "Femme"@fr;
skos:inScheme ddi:SexRepr.
ddi:ageVD a disco:RepresentedVariable;
disco:universe ddi:UniversePerson;
disco:representation ddi:AgeRepr;
disco:concept ddi:IpumsC1;
skos:prefLabel "Age"@en, "Âge"@fr;
dcterms:description "Age data element"@en.
ddi:AgeRepr a skos:ConceptScheme, disco:Representation;
skos:hasTopConcept ddi:Age0, ddi:Age1, ddi:Age99.
ddi:Age0 a skos:Concept;
skos:notation "0";
skos:prefLabel "0";
skos:inScheme ddi:AgeRepr.
ddi:Age1 a skos:Concept;
skos:notation "1";
skos:prefLabel "1";
skos:inScheme ddi:AgeRepr.
# ...
ddi:Age99 a skos:Concept;
skos:notation "99";
skos:prefLabel "99";
skos:inScheme ddi:AgeRepr.
ddi:CitizenshipVD a disco:RepresentedVariable;
disco:universe ddi:UniverseNonArgentines;
disco:representation ddi:CitizenshipRepr;
disco:concept ddi:IpumsC2;
skos:prefLabel "Citizenship"@en;
dcterms:description "Citizenship data element"@en.
ddi:CitizenshipRepr a skos:ConceptScheme, disco:Representation;
skos:hasTopConcept ddi:CYes, ddi:CNo, ddi:CUnknown, ddi:CNIU.
ddi:CYes a skos:Concept;
skos:notation "1";
skos:prefLabel "Yes";
skos:inScheme ddi:CitizenshipRepr.
ddi:CNo a skos:Concept;
skos:notation "2";
skos:prefLabel "No";
skos:inScheme ddi:CitizenshipRepr.
ddi:CUnknown a skos:Concept;
skos:notation "8";
skos:prefLabel "Unknown";
skos:inScheme ddi:CitizenshipRepr.
ddi:CNIU a skos:Concept;
skos:notation "9";
skos:prefLabel "NIU (not in universe)";
skos:inScheme ddi:CitizenshipRepr.
</pre>
Any universe of a variable definition is a subset of the universe of the entire study.
In our example, two questions are addressing the universe of persons, the third question
is addressing a specific subset of the universe of persons.
<pre class="example">
ddi:UniversePerson a disco:Universe;
skos:definition "All persons."@en ;
skos:narrower ddi:Universe_1.
ddi:UniverseNonArgentines a disco:Universe;
skos:definition "Foreign-born persons who reside in Argentina."@en ;
skos:narrower ddi:Universe_1;
skos:narrower ddi:UniversePerson.
</pre>
<p>At the bottom of the screen showing the variable detail, we
can see that the variable for roofing material is associated
with a high-level concept, “Dwelling characteristics variables.”
(Figure 8.)</p>
<figure>
<img src="images/example-concept.png"/>
<figcaption>Concept-Variable Link</figcaption>
</figure>
<p>In Disco, DDI concepts can be hierarchically structured</p>
<pre class="example">
ddi:IpumsCS a skos:ConceptScheme;
skos:hasTopConcept ddi:IpumsC1.
ddi:IpumsC1 a skos:Concept;
skos:prefLabel "Demographic Variables - PERSON"@en, "Variables démographiques - PERSONNE"@fr;
skos:inScheme ddi:IpumsCS.
ddi:IpumsC2 a skos:Concept;
skos:prefLabel "Nativity and Birthplace Variables -- PERSON"@en;
skos:inScheme ddi:IpumsCS.
</pre>
The variable within a data file can be described using category statistics.
In the following example, absolute and relative frequencies of the variable categories are described.
This variable represents the sex of the respondent.
A variable is represented by a code list containing the code, the category statistics resource is pointing to.
<pre class="example">
ddi:CatStatistics_1 a disco:CategoryStatistics;
disco:frequency 13314444;
disco:percentage 49.97;
disco:statisticsCategory ddi:SexM;
disco:statisticsDataFile ddi:Datafile_1.
ddi:CatStatistics_2 a disco:CategoryStatistics;
disco:frequency 1336270;
disco:statisticsCategory ddi:SexF;
disco:statisticsDataFile ddi:Datafile_1.
</pre>
<p>Next we find some general information about the data files
produced by this study (Figure 9).</p>
<figure>
<img src="images/example-general-data-file-info.png"/>
<figcaption>General Data Set Information</figcaption>
</figure>
<p>Finally, the data file more concretely describes the actual physical file.</p>
<pre class="example">
ddi:Datafile_1 a disco:Datafile;
dcterms:identifier "ARG1900-P-H.dat";
dcterms:description "Person records"@en;
disco:caseQuantity 2667714;
dcterms:format "ascii";
dcterms:provenance "Minnesota Population Center"@en;
owl:versionInfo "Version 1.0, IPUMS sample"@en;
dcterms:spatial [
# This is the DC-strictly compatible way to do it
a dcterms:Location;
rdfs:label "Argentina, national coverage"@en
];
dcterms:temporal "PeriodOfTime"@en;
dcterms:subject "To be defined"@en.
</pre>
</section>
<section id='study'>
<h2>Studies and StudyGroups</h2>
<p>A simple <b><code><a href="#dfn-disco-study" class="internalDFN">Study</a></code></b> supports the stages of the full data lifecycle
in a modular manner. A <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> represents the
process by which a data set was generated or collected. Literal
properties include information about the funding, organizational
affiliation, abstract, title, version, and other such high-level
information. The key criteria for a study are:
a single conceptual model (e.g. survey research concept),
a single instrument (e.g. questionnaire) made up of one or more parts
(ex. employer survey, worker survey), and a single logical data structure
of the initial raw data (multiple data files can be created from this such
as a public use microdata file or aggregate data files).
In some cases, where data collection is cyclic or
on-going, data sets may be released as a <b><code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code></b>, where each
cycle or "wave" of the data collection activity produces one or
more data sets. This is typical for longitudinal studies, panel
studies, and other types of "series" (to use the DDI term). In
this case, a number of <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> objects would be collected into a
single <code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>.</p>
<p>
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) may be contained in at most 1 <code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code> and groups of studies may include
0 to n studies.
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) may have 0 to n instruments (<code><a href="#dfn-disco-instrument" class="internalDFN">Instrument</a></code>) relationships to instruments (<code><a href="#dfn-disco-instrument" class="internalDFN">Instrument</a></code>).
Particular instruments (<code><a href="#dfn-disco-instrument" class="internalDFN">Instrument</a></code>), however, are connected with exactly 1 <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>.
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) may have <code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code> connections with 0 to n data files (<code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code>) and data files (<code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code>) must have
1 to n <code><a href="#dfn-disco-datafile" class="internalDFN">DataFile</a></code> relationships to studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>).
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) are associated with 0 to n variables (<code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>) using the object property <code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>.
On the other hand, variables (<code><a href="#dfn-disco-variable" class="internalDFN">Variable</a></code>) must be related to 1 to n studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>).
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) may have 0 to n logical data sets (<code><a href="#dfn-disco-logicaldataset" class="internalDFN">LogicalDataSet</a></code>) (<code><a href="#dfn-disco-product" class="internalDFN">product</a></code>) and logical data sets (<code><a href="#dfn-disco-logicaldataset" class="internalDFN">LogicalDataSet</a></code>) must
have 1 to n <code><a href="#dfn-disco-product" class="internalDFN">product</a></code> relationships to studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>).
</p>
<section>
<h3>Coverage, References to DDI-XML Files, and Kind of Data</h3>
</section>
<figure>
<img class="uml-diagram" src="diagrams/study-metadata.png"/>
<figcaption>Coverage, References to DDI-XML Files, and Kind of Data</figcaption>
</figure>
<p>
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) or groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) (the <b>union of <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> and groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code></b>))
may have different datatype properties.
Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) or groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) may have an abstract (<code>dcterms:abstract</code>), a title
(<code>dcterms:title</code>), a subtitle (<code>subtitle</code>), an alternative title
(<code>dcterms:alternative</code>), a purpose (<code>purpose</code>), and information about
the date and the time since when the <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> is publicly available
(<code>dcterms:available</code>). Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) or groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) may have multiple object properties.
The object properties <code>kindOfData</code> and
<code>dcterms:subject</code> guide to <code>skos:Concepts</code>.
<code>kindOfData</code> describes, with a string or a term from a controlled vocabulary,
the kind of data documented in the logical product(s) of a <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>.
Examples include survey data, census/enumeration data, administrative data, measurement data, assessment data, demographic data, voting data, etc.
Coverage describes the temporal, spatial and topical coverage of a study. <a href="https://stats.oecd.org/glossary/detail.asp?ID=464">Coverage</a> specifies the population from which observations for a particular topic can be drawn.
You can use <code>dcterms:subject</code> to describe the topical coverage of studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) and groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>).
<code>ddiFile</code> to <code>foaf:Documents</code> which are the DDI-XML files
containing further descriptions of the <code><a href="#dfn-disco-study" class="internalDFN">Study</a></code> or the <code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>.
Use <code>dcterms:temporal</code> for temporal coverages related to the union of studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) and groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroups</a></code>).
For the spatial coverage use <code>dcterms:spatial</code>.
The cardinalities of all the object properties are in both directions 0 to n.
The only exception is that studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) and groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) may have 0 or 1
<code>kindOfData</code> relationships to <code>skos:Concepts</code>.
</p>
<section>
<h3>Relationships to Agents</h3>
</section>
<figure>
<img class="uml-diagram" src="diagrams/study-agent.png"/>
<figcaption>Relationships to Agents</figcaption>
</figure>
<p>
Creators (<code>dcterms:creator</code>), contributors (<code>dcterms:contributor</code>),
and publishers (<code>dcterms:publisher</code>) of Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) and groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) are <code>foaf:Agents</code>
which are either <code>foaf:Persons</code> or <code>org:Organizations</code> whose members
are <code>foaf:Persons</code>. Studies (<code><a href="#dfn-disco-study" class="internalDFN">Study</a></code>) or groups of studies (<code><a href="#dfn-disco-studygroup" class="internalDFN">StudyGroup</a></code>) may be funded by
(<code>fundedBy</code>)
<code>foaf:Agents</code>. The object property <code>fundedBy</code> is defined as sub-property of
<code>dcterms:contributor</code>. The cardinalities of these object properties are in both
directions always 0 to n.