Skip to content

Commit

Permalink
feat: support divorces, add marriage information
Browse files Browse the repository at this point in the history
The existing `IS_MARRIED_TO` has been changed to `IS_SPOUSE_OF`.

`IS_SPOUSE_OF` is inferred from family's `HUSB` / `WIFE` tags.

`IS_MARRIED_TO` relationships are now only created if there are
marriage family event information (`MARR` Gedcom tag).

`DIVORCED` relationships are created from family divorce event
information (`DIV` Gedcom tag).
  • Loading branch information
fbiville committed Sep 25, 2024
1 parent faf84d5 commit ac41346
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 70 deletions.
24 changes: 18 additions & 6 deletions src/main/java/com/neo4j/data/importer/GedcomImporter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.neo4j.data.importer;

import com.joestelmach.natty.Parser;
import com.neo4j.data.importer.extractors.FamilyExtractors;
import com.neo4j.data.importer.extractors.PersonExtractors;
import java.io.File;
Expand Down Expand Up @@ -36,7 +37,8 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
var filePath = rebuildPath(file);
var model = loadModel(filePath);

var personExtractors = new PersonExtractors(model);
var dateParser = new Parser();
var personExtractors = new PersonExtractors(dateParser, model);
var statistics = new Statistics();
try (Transaction tx = db.beginTx()) {
model.getPeople().forEach(person -> {
Expand All @@ -47,20 +49,30 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
statistics.addNodesCreated(personsStats.getNodesCreated());
});

var familyExtractors = new FamilyExtractors();
var familyExtractors = new FamilyExtractors(dateParser);
model.getFamilies().forEach(family -> {
var attributes = familyExtractors.get().apply(family);
var stats = tx.execute(
"""
UNWIND $spouseIdPairs AS spousePair
MATCH (spouse1:Person {id: spousePair.id1}), (spouse2:Person {id: spousePair.id2})
CREATE (spouse1)-[:IS_MARRIED_TO]->(spouse2)
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:IS_SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:IS_MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:IS_CHILD_OF]->(spouse1)
CREATE (child)-[:IS_CHILD_OF]->(spouse2)
""",
familyExtractors.get().apply(family))
attributes)
.getQueryStatistics();

statistics.addRelationshipsCreated(stats.getRelationshipsCreated());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import com.neo4j.data.importer.extractors.Lists.Pair;
import java.util.List;
import java.util.Map;
import org.folg.gedcom.model.Family;
import org.folg.gedcom.model.SpouseRef;

class DefaultFamilyExtractor implements FamilyExtractor {

private final Parser dateParser;

DefaultFamilyExtractor(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
public List<Pair<String, String>> spouseReferences(Family family) {
List<String> spouseReferences1 =
Expand All @@ -16,6 +24,11 @@ public List<Pair<String, String>> spouseReferences(Family family) {
return Lists.crossProduct(spouseReferences1, spouseReferences2);
}

@Override
public Map<String, List<Map<String, Object>>> familyEvents(Family family) {
return EventFacts.extract(family.getEventsFacts(), dateParser);
}

@Override
public List<String> childReferences(Family family) {
return family.getChildRefs().stream().map(SpouseRef::getRef).toList();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -19,8 +16,8 @@ class DefaultPersonExtractor implements PersonExtractor {

private final Parser dateParser;

public DefaultPersonExtractor() {
this.dateParser = new Parser();
public DefaultPersonExtractor(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
Expand Down Expand Up @@ -50,42 +47,7 @@ public Optional<String> gender(Person person) {

@Override
public Map<String, Object> facts(Person person) {
Map<String, Object> attributes = new HashMap<>();
person.getEventsFacts().forEach(eventFact -> {
String factName = eventFact.getDisplayType().toLowerCase(Locale.ROOT);
String date = eventFact.getDate();
if (date != null) {
attributes.put(String.format("raw_%s_date", factName), date);
var localDate = parseLocalDate(date);
if (localDate != null) {
attributes.put(String.format("%s_date", factName), localDate);
}
}

String place = eventFact.getPlace();
if (place != null) {
attributes.put(factName + "_" + "location", place);
}
});
return attributes;
}

private LocalDate parseLocalDate(String date) {
var parse = dateParser.parse(date);
if (parse.size() != 1) {
return null;
}

var dateGroup = parse.get(0);
if (dateGroup.getDates().size() != 1 || dateGroup.isDateInferred()) {
// Dates should be parsed explicitly from input.
// Inferred dates are likely to be set using current time and therefore incorrect.
return null;
}

var parsedDate = dateGroup.getDates().get(0);

return LocalDate.ofInstant(parsedDate.toInstant(), ZoneId.systemDefault());
return EventFacts.extractFlat(person.getEventsFacts(), dateParser);
}

private static List<String> extractNames(Person person, Function<Name, String> nameFn) {
Expand Down
88 changes: 88 additions & 0 deletions src/main/java/com/neo4j/data/importer/extractors/EventFacts.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
import org.folg.gedcom.model.EventFact;

class EventFacts {

/**
* extractFlat extracts all events' place and location into a single, "flat" map
*/
public static Map<String, Object> extractFlat(List<EventFact> facts, Parser dateParser) {
var attributes = new HashMap<String, Object>();
facts.forEach(fact -> {
attributes.putAll(extractFact(
fact,
dateParser,
(eventFact) ->
String.format("%s_", eventFact.getDisplayType().toLowerCase(Locale.ROOT))));
});
return attributes;
}

/**
* extract all events' place and location, categorized by event tag
*/
public static Map<String, List<Map<String, Object>>> extract(List<EventFact> facts, Parser dateParser) {
var attributes = new HashMap<String, List<Map<String, Object>>>();
for (EventFact fact : facts) {
var eventsPerTag =
attributes.computeIfAbsent(fact.getTag().toUpperCase(Locale.ROOT), (key) -> new ArrayList<>());
eventsPerTag.add(extractFact(fact, dateParser));
}
return attributes;
}

private static Map<String, Object> extractFact(EventFact eventFact, Parser dateParser) {
return extractFact(eventFact, dateParser, (fact) -> "");
}

private static Map<String, Object> extractFact(
EventFact fact, Parser dateParser, Function<EventFact, String> keyQualifierFn) {
var attributes = new HashMap<String, Object>(2);
String date = fact.getDate();
String keyQualifier = keyQualifierFn.apply(fact);
String type = fact.getType();
if (type != null) {
attributes.put(String.format("%stype", keyQualifier), type);
}
if (date != null) {
attributes.put(String.format("raw_%sdate", keyQualifier), date);
var localDate = parseLocalDate(dateParser, date);
if (localDate != null) {
attributes.put(String.format("%sdate", keyQualifier), localDate);
}
}
String place = fact.getPlace();
if (place != null) {
attributes.put(String.format("%slocation", keyQualifier), place);
}
return attributes;
}

private static LocalDate parseLocalDate(Parser dateParser, String date) {
var parse = dateParser.parse(date);
if (parse.size() != 1) {
return null;
}

var dateGroup = parse.get(0);
if (dateGroup.getDates().size() != 1 || dateGroup.isDateInferred()) {
// Dates should be parsed explicitly from input.
// Inferred dates are likely to be set using current time and therefore incorrect.
return null;
}

var parsedDate = dateGroup.getDates().get(0);

return LocalDate.ofInstant(parsedDate.toInstant(), ZoneId.systemDefault());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ public interface FamilyExtractor extends AttributeExtractor<Family> {

List<Pair<String, String>> spouseReferences(Family family);

Map<String, List<Map<String, Object>>> familyEvents(Family family);

List<String> childReferences(Family family);

default Map<String, Object> apply(Family family) {
var spouseIds = spouseReferences(family).stream()
var familyEvents = familyEvents(family);
var spouseInfo = spouseReferences(family).stream()
.map(couple -> Map.of(
"id1", couple.left(),
"id2", couple.right()))
"id2", couple.right(),
"events", familyEvents))
.toList();
return Map.of("spouseIdPairs", spouseIds, "childIds", childReferences(family));
return Map.of("spouseIdPairs", spouseInfo, "childIds", childReferences(family));
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.util.function.Supplier;
import org.folg.gedcom.model.Family;

public class FamilyExtractors implements Supplier<AttributeExtractor<Family>> {

private final Parser dateParser;

public FamilyExtractors(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
public AttributeExtractor<Family> get() {
return new DefaultFamilyExtractor();
return new DefaultFamilyExtractor(dateParser);
}
}
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.util.Locale;
import java.util.function.Supplier;
import org.folg.gedcom.model.Gedcom;
import org.folg.gedcom.model.Person;

public class PersonExtractors implements Supplier<AttributeExtractor<Person>> {

private final Parser dateParser;
private final String generatorName;

public PersonExtractors(Gedcom model) {
public PersonExtractors(Parser dateParser, Gedcom model) {
this.dateParser = dateParser;
this.generatorName = model.getHeader().getGenerator().getName().toLowerCase(Locale.ROOT);
}

@Override
public AttributeExtractor<Person> get() {
var defaultExtractor = new DefaultPersonExtractor();
var defaultExtractor = new DefaultPersonExtractor(dateParser);
if ("heredis pc".equals(generatorName)) {
return new HeredisPersonExtractor(defaultExtractor);
}
Expand Down
Loading

0 comments on commit ac41346

Please sign in to comment.