Skip to content

Commit

Permalink
Adds the -insitu flag
Browse files Browse the repository at this point in the history
generates a finding aid but no IUPAC FAIRSpec Collection
within the *originating* directory source.

Safeguards need to be put in place, but it does work, and the Finding
Aid maintains a significant number of representations in byte[] form.
  • Loading branch information
BobHanson committed Dec 9, 2024
1 parent 661e007 commit 1f4e539
Show file tree
Hide file tree
Showing 24 changed files with 314 additions and 216 deletions.
Binary file modified dist/IFDExtractor.jar
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ public DOICrawler(String... args) {
*/
@Override
public void addDeferredPropertyOrRepresentation(String key, Object val,
boolean isInLine, String mediaType, String note) {
boolean isInLine, String mediaType, String note, String src) {
// TODO Auto-generated method stub

}
Expand Down Expand Up @@ -698,7 +698,7 @@ protected void createFindingAid() {
processRecords(null, doiList);
String aid = faHelper.generateFindingAid(targetPath);
if (aid != null && createLandingPage) {
buildSite();
buildSite(targetPath);
}

} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -892,9 +892,9 @@ public void setLocalSourceFileName(String name) {
public void setLists(String rootPath, String ignore, String accept) {
if (lstManifest != null)
return;
lstManifest = new FileList(rootPath, "manifest");
lstIgnored = new FileList(rootPath, "ignored");
lstAccepted = new FileList(rootPath, "accepted");
lstManifest = new FileList(rootPath, "manifest", null);
lstIgnored = new FileList(rootPath, "ignored", null);
lstAccepted = new FileList(rootPath, "accepted", null);
if (ignore != null)
lstIgnored.setAcceptPattern(ignore);
if (accept != null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ public abstract class FindingAidCreator implements MetadataReceiverI {
*/
protected boolean addPublicationMetadata = false;

/**
* setting insitu true generates an entirely self-contained finding aid, with
* no local files at all, only origin files, without any rezipping,
* in the origin directory. The target directory only contains ancillary files.
*/
protected boolean insitu = false;

/**
* set true to zip up the extracted collection, placing that in the target
* directory
Expand Down Expand Up @@ -159,7 +166,7 @@ protected void setDerivedFlags() {
// this next is independent of readOnly
// false to bypass final creation of an
// _IFD_collection.zip file
createZippedCollection = createZippedCollection && !debugReadOnly;
createZippedCollection = createZippedCollection && !insitu && !debugReadOnly;

readOnly |= debugReadOnly; // for testing; when true, no output other than a log file is produced
noOutput = (createFindingAidOnly || readOnly);
Expand Down Expand Up @@ -248,6 +255,10 @@ protected String checkFlags(String flags) {
createZippedCollection = false;
}

if (flags.indexOf("-insitu;") >= 0) {
insitu = true;
}

if (flags.indexOf("-readonly;") >= 0) {
readOnly = true;
}
Expand Down Expand Up @@ -417,9 +428,9 @@ public IFDFindingAid getFindingAid() {
return getHelper().getFindingAid();
}

protected void buildSite() {
protected void buildSite(File htmlPath) {
try {
PageCreator.buildSite(targetPath, true, launchLandingPage);
PageCreator.buildSite(htmlPath, true, launchLandingPage);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
Expand Down
23 changes: 14 additions & 9 deletions src/main/java/com/integratedgraphics/extractor/IFDExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,12 @@ protected static String getCommandLineHelp() {
+ "\n-requirePubInfo (throw an error is datacite cannot be reached; post-publication-related collections only)";
}

public void runExtraction(String ifdExtractFile, String localSourceArchive, String targetDir) {
runExtraction(new String[] { ifdExtractFile, localSourceArchive, targetDir });
public IFDExtractor() {
initializeExtractor();
}

public void runExtraction(String ifdExtractFile, String localSourceArchive, String targetDir, String flags) {
runExtraction(new String[] { ifdExtractFile, localSourceArchive, targetDir, flags });
}

public void runExtraction(String[] args) {
Expand Down Expand Up @@ -166,7 +170,6 @@ public void runExtraction(String[] args) {
throw new NullPointerException("No IFD-extract.json or test set?");
if (targetDir == null)
targetDir = "site";
new File(targetDir).mkdirs();
FAIRSpecUtilities.setLogging(targetDir + "/extractor.log");
int failed = 0;
logToSys("Extractor.runExtraction output to " + new File(targetDir).getAbsolutePath());
Expand Down Expand Up @@ -212,10 +215,6 @@ public String processFlags(String[] args, String moreFlags) {
return flags;
}

public IFDExtractor() {
initializeExtractor();
}

public String dumpFlags() {
String s = " stopOnAnyFailure = " + stopOnAnyFailure //
+ "\n debugging = " + debugging //
Expand Down Expand Up @@ -311,12 +310,18 @@ public void run(File ifdExtractScriptFile, File targetPath, String localsourceAr
throws IOException, IFDException {
log("!Extractor\n ifdExtractScriptFile= " + ifdExtractScriptFile + "\n localsourceArchive = "
+ localsourceArchive + "\n targetDir = " + targetPath.getAbsolutePath());
if (extractAndCreateFindingAid(ifdExtractScriptFile, localsourceArchive, targetPath) == null) {

File htmlPath = (insitu ? new File(localsourceArchive) : targetPath);

String serializedFA = extractAndCreateFindingAid(ifdExtractScriptFile, localsourceArchive, targetPath);
if (serializedFA == null) {
if (!allowNoPubInfo) {
throw new IFDException("Extractor failed");
}
} else if (createLandingPage) {
buildSite();
if (insitu)
FAIRSpecUtilities.writeBytesToFile(serializedFA.getBytes(), new File(htmlPath, "IFD.findingaid.json"));
buildSite(htmlPath);
}

log("!Extractor extracted " + lstManifest.size() + " files (" + lstManifest.getByteCount() + " bytes)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ abstract class IFDExtractorLayer0 extends FindingAidCreator implements FAIRSpecE
protected String stopAfter;

protected void checkStopAfter(String what) {
System.out.println(what
+ " " + helper.getCompoundCollection().size()
+ " " + helper.getStructureCollection().size()
+ " " + helper.getSpecCollection().size()
);
boolean stopping = what.equals(stopAfter);
if (stopping) {
System.out.println("stopping after " + what);
Expand Down Expand Up @@ -143,7 +148,7 @@ protected void checkStopAfter(String what) {
/**
* list of files to always accept, specified an extractor JSON template
*/
protected FileList lstAccepted = new FileList(null, "accepted");
protected FileList lstAccepted = new FileList(null, "accepted", null);

/**
* list of files ignored -- probably MACOSX trash or Google desktop.ini trash
Expand All @@ -158,14 +163,14 @@ protected void checkStopAfter(String what) {
/**
* list of files rejected -- probably MACOSX trash or Google desktop.ini trash
*/
protected final FileList lstRejected = new FileList(null, "rejected");
protected final FileList lstRejected = new FileList(null, "rejected", ".");

/**
* Track the files written to the collection, even if there is no output. This
* allows for removing ZIP files from the finding aid and manifest if they are
* not actually written.
*/
protected FileList lstWritten = new FileList(null, "written");
protected FileList lstWritten = new FileList(null, "written", null);

/**
* objects found in IFD-extract.json
Expand Down Expand Up @@ -214,7 +219,7 @@ protected void checkStopAfter(String what) {
public void addProperty(String key, Object val) {
if (val != IFDProperty.NULL)
log(this.localizedName + " addProperty " + key + "=" + val);
addDeferredPropertyOrRepresentation(key, val, false, null, null);
addDeferredPropertyOrRepresentation(key, val, false, null, null, "L0 vndaddprop");
}

/**
Expand All @@ -230,21 +235,21 @@ public void addProperty(String key, Object val) {
* extraction
* @param val either a String value or an Object[] with elements byte[]
* and String name
* @param mediaType a media type for a representation, or null
* @param isInline representation data is being provided as inline-data, to be
* saved only in the finding aid (InChI, SMILES, InChIKey)
* @param mediaType a media type for a representation, or null
*/
@Override
public void addDeferredPropertyOrRepresentation(String key, Object val, boolean isInline, String mediaType,
String note) {
String note, String src) {
// System.out.println("!!!" + key + " ln=" + localizedName + " op=" +
// originPath);
if (key == null) {
deferredPropertyList.add(null);
return;
}
deferredPropertyList
.add(new Object[] { originPath, localizedName, key, val, Boolean.valueOf(isInline), mediaType, note });
.add(new Object[] { originPath, localizedName, key, val, Boolean.valueOf(isInline), mediaType, note, src });
if (key.startsWith(DefaultStructureHelper.STRUC_FILE_DATA_KEY)) {
// Phase 2a has identified a structure before a compound has been established in Phase 2b.
// Mestrelab vendor plug-in has found a MOL or SDF file in Phase 2b.
Expand Down Expand Up @@ -461,6 +466,8 @@ private void writeDigitalItem(String originPath, ArchiveInputStream ais, long le
lstManifest.add(localizedName, len);
break;
}
if (insitu)
return;
File f = getAbsoluteFileTarget(localizedName);
FAIRSpecUtilities.getLimitedStreamBytes(ais, len, new FileOutputStream(f), false, true);
}
Expand Down
Loading

0 comments on commit 1f4e539

Please sign in to comment.