Skip to content

Commit

Permalink
Merge pull request #277 from INCATools/query-labels
Browse files Browse the repository at this point in the history
Add ability to query annotation value patterns
  • Loading branch information
balhoff authored Dec 3, 2020
2 parents 2fe0925 + e6f164d commit 183f8ca
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class DOSDPEntityChecker(dosdp: DOSDP, prefixes: PartialFunction[String, String]
private val factory = OWLManager.getOWLDataFactory

def getOWLAnnotationProperty(name: String): OWLAnnotationProperty = {
val properties = dosdp.annotationProperties.getOrElse(Map.empty)
val properties = dosdp.annotationProperties.getOrElse(Map.empty) ++
dosdp.readable_identifiers.toList.flatten.map(id => id -> id).toMap
nameToIRI(name, properties).map(factory.getOWLAnnotationProperty).orNull
}

Expand Down
79 changes: 67 additions & 12 deletions src/main/scala/org/monarchinitiative/dosdp/SPARQL.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package org.monarchinitiative.dosdp

import java.util.UUID
import java.util.regex.Pattern

import org.apache.jena.query.ParameterizedSparqlString
import org.monarchinitiative.dosdp.cli.Config.{AxiomKind, LogicalAxioms}
import org.monarchinitiative.dosdp.cli.Generate
import org.phenoscape.owlet.OwletManchesterSyntaxDataType.SerializableClassExpression
import org.semanticweb.owlapi.apibinding.OWLManager
import org.semanticweb.owlapi.model._
Expand All @@ -11,23 +14,28 @@ import scala.jdk.CollectionConverters._

object SPARQL {

def queryFor(dosdp: ExpandedDOSDP): String = {
private val factory = OWLManager.getOWLDataFactory()

def queryFor(dosdp: ExpandedDOSDP, axioms: AxiomKind): String = {
s"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ${selectFor(dosdp)}
SELECT DISTINCT ${selectFor(dosdp, axioms)}
WHERE {
${triplesFor(dosdp).mkString("\n")}
${triplesFor(dosdp, axioms).mkString("\n")}
}
ORDER BY ?defined_class_label
"""
}

def selectFor(dosdp: ExpandedDOSDP): String = {
def selectFor(dosdp: ExpandedDOSDP, axioms: AxiomKind): String = {
val axVariables = axiomVariables(dosdp)
val variables = axVariables ++ axVariables.map(v => s"(STR(${v}__label) AS ${v}_label)")
val variables = axVariables ++ axVariables.map(v => s"(STR(${v}__label) AS ${v}_label)") ++
(if (axioms != LogicalAxioms) axVariables.filterNot(_.startsWith(s"?${DOSDP.DefinedClassVariable}"))
.map(v => s"(STR(${v}__match_property) AS ${v}_match_property)") else Set.empty[String])
if (variables.isEmpty) "*" else variables.toSeq
.sortBy(_.replaceFirst("\\(STR\\(", "").replaceFirst(DOSDP.DefinedClassVariable, "0"))
.mkString(" ")
Expand All @@ -44,8 +52,18 @@ ORDER BY ?defined_class_label

private val Thing = OWLManager.getOWLDataFactory.getOWLThing

def triplesFor(dosdp: ExpandedDOSDP): Seq[String] = {
val axiomTriples = dosdp.filledLogicalAxioms(None, None).toSeq.flatMap(triples)
def triplesFor(dosdp: ExpandedDOSDP, axioms: AxiomKind): Seq[String] = {
val props = dosdp.readableIdentifierProperties.to(Set)
val logicalBindings = dosdp.dosdp.vars.map { vars =>
vars.keys.map { key =>
key -> SingleValue(DOSDP.variableToIRI(key).toString)
}.toMap
}
val (queryLogical, queryAnnotations) = Generate.axiomsOutputChoice(axioms)
val annotationTriples = if (queryAnnotations) dosdp.filledAnnotationAxioms(logicalBindings, None).toSeq.flatMap(ax => triples(ax, props))
else Nil
val axiomTriples = if (queryLogical) dosdp.filledLogicalAxioms(None, None).toSeq.flatMap(ax => triples(ax, props))
else Nil
val variableTriples = dosdp.varExpressions.toSeq.flatMap {
case (_, Thing) => Seq.empty // relationships to owl:Thing are not typically explicit in the ontology
case (variable, named: OWLClass) => Seq(s"?${DOSDP.processedVariable(variable)} rdfs:subClassOf* <${named.getIRI}> .")
Expand All @@ -56,15 +74,15 @@ ORDER BY ?defined_class_label
Seq(s"?${DOSDP.processedVariable(variable)} rdfs:subClassOf $sanitizedExpression .")
}
val labelTriples = axiomVariables(dosdp).map(v => s"OPTIONAL { $v rdfs:label ${v}__label . }")
axiomTriples ++ variableTriples ++ labelTriples
annotationTriples ++ axiomTriples ++ variableTriples ++ labelTriples
}

def triples(axiom: OWLAxiom): Seq[String] = axiom match {
case subClassOf: OWLSubClassOfAxiom =>
def triples(axiom: OWLAxiom, readableIdentifierProperties: Set[OWLAnnotationProperty]): Seq[String] = axiom match {
case subClassOf: OWLSubClassOfAxiom =>
val (subClass, subClassTriples) = triples(subClassOf.getSubClass)
val (superClass, superClassTriples) = triples(subClassOf.getSuperClass)
Seq(s"$subClass rdfs:subClassOf $superClass .") ++ subClassTriples ++ superClassTriples
case equivalentTo: OWLEquivalentClassesAxiom =>
case equivalentTo: OWLEquivalentClassesAxiom =>
if (!equivalentTo.containsNamedEquivalentClass || (equivalentTo.getClassExpressions.size > 2)) scribe.warn("More than two operands or missing named class in equivalent class axiom unexpected")
(for {
named <- equivalentTo.getNamedClasses.asScala.headOption
Expand All @@ -74,7 +92,7 @@ ORDER BY ?defined_class_label
val (equivClass, equivClassTriples) = triples(expression)
Seq(s"$namedClass owl:equivalentClass $equivClass .") ++ namedClassTriples ++ equivClassTriples
}).toSeq.flatten
case disjointWith: OWLDisjointClassesAxiom =>
case disjointWith: OWLDisjointClassesAxiom =>
if (!disjointWith.getClassExpressions.asScala.forall(_.isAnonymous) || (disjointWith.getClassExpressions.size > 2)) scribe.warn("More than two operands or missing named class in equivalent class axiom unexpected")
(for {
named <- disjointWith.getClassExpressions.asScala.find(!_.isAnonymous)
Expand All @@ -84,6 +102,43 @@ ORDER BY ?defined_class_label
val (equivClass, equivClassTriples) = triples(expression)
Seq(s"$namedClass owl:disjointWith $equivClass .") ++ namedClassTriples ++ equivClassTriples
}).toSeq.flatten
case annotationAssertion: OWLAnnotationAssertionAxiom =>
val (subject, subjecTriples) = triples(factory.getOWLClass(annotationAssertion.getSubject.asInstanceOf[IRI]))
val property = s"<${annotationAssertion.getProperty.getIRI}>"
val (value, valueTriples) = triples(annotationAssertion.getValue, readableIdentifierProperties)
Seq(s"$subject $property $value .") ++ subjecTriples ++ valueTriples
}

private val DOSDPVariableIRIMatch = s"\\b${DOSDP.variablePrefix}(\\S+)\\b".r

private def escape(text: String): String = {
val pss = new ParameterizedSparqlString()
pss.appendLiteral(text)
pss.toString
}

def triples(annotationValue: OWLAnnotationValue, readableIdentifierProperties: Set[OWLAnnotationProperty]): (String, Seq[String]) = annotationValue match {
case iri: IRI =>
iri.toString match {
case DOSDPVariable(variable) => (s"?$variable", List(s"FILTER(isIRI(?$variable))"))
case _ => (s"<$iri>", Nil)
}
case literal: OWLLiteral =>
val node = genVar
val text = literal.getLiteral
val valueRegex = DOSDPVariableIRIMatch.pattern.split(text, -1).map(Pattern.quote).mkString("(.+)")
val variableNames = DOSDPVariableIRIMatch.findAllMatchIn(text).toList.map(_.group(1))
val predicates = readableIdentifierProperties.map(p => s"<${p.getIRI}>").mkString(" ")
val varPatterns = variableNames.zipWithIndex.flatMap { case (variableName, index) =>
val predicateVar = s"${variableName}__match_property"
val variableMatchLabel = genVar
List(
s"""BIND((REPLACE($node, ${escape(valueRegex)}, "$$${index + 1}")) AS $variableMatchLabel)""",
s"VALUES ?$predicateVar { $predicates }",
s"?$variableName ?$predicateVar $variableMatchLabel ."
)
}
(node, s"FILTER(REGEX($node, ${escape(valueRegex)}))" :: varPatterns)
}

def triples(expression: OWLClassExpression): (String, Seq[String]) = expression match {
Expand Down
29 changes: 26 additions & 3 deletions src/main/scala/org/monarchinitiative/dosdp/cli/Config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import caseapp.core.argparser.{ArgParser, SimpleArgParser}
import com.github.tototoshi.csv.{CSVFormat, DefaultCSVFormat, TSVFormat}
import io.circe.generic.auto._
import io.circe.yaml.parser
import org.monarchinitiative.dosdp.cli.Config.{BoolValue, FalseValue, MultiArgList, TrueValue, inputDOSDPFrom}
import org.monarchinitiative.dosdp.cli.Config.{AllAxioms, AxiomKind, BoolValue, FalseValue, LogicalAxioms, MultiArgList, inputDOSDPFrom}
import org.monarchinitiative.dosdp.{DOSDP, OBOPrefixes, Utilities}
import org.semanticweb.owlapi.model.OWLOntology
import zio._
Expand Down Expand Up @@ -98,7 +98,7 @@ final case class GenerateConfig(@Recurse
infile: String = "fillers.tsv",
@HelpMessage("Restrict generated axioms to 'logical', 'annotation', or 'all' (default)")
@ValueDescription("all|logical|annotation")
restrictAxiomsTo: String = "all",
restrictAxiomsTo: AxiomKind = AllAxioms,
@HelpMessage("Data column containing local axiom output restrictions")
@ValueDescription("name")
restrictAxiomsColumn: Option[String],
Expand Down Expand Up @@ -135,7 +135,11 @@ final case class QueryConfig(@Recurse
reasoner: Option[String],
@HelpMessage("Print generated query without running against ontology")
@ValueDescription("true|false")
printQuery: BoolValue = FalseValue) extends Config {
printQuery: BoolValue = FalseValue,
@HelpMessage("Restrict queried axioms to 'logical', 'annotation', or 'all' (default)")
@ValueDescription("all|logical|annotation")
restrictAxiomsTo: AxiomKind = LogicalAxioms
) extends Config {

override def run: ZIO[zio.ZEnv, DOSDPError, Unit] = Query.run(this)

Expand Down Expand Up @@ -199,6 +203,25 @@ object Config {
else Right(MultiArgList(arg.split(" ", -1).toList))
}

sealed trait AxiomKind

case object LogicalAxioms extends AxiomKind

case object AnnotationAxioms extends AxiomKind

case object AllAxioms extends AxiomKind

implicit val axiomKindArgParser: ArgParser[AxiomKind] = SimpleArgParser.from[AxiomKind]("axiom kind")(parseAxiomKind)

def parseAxiomKind(arg: String): Either[MalformedValue, AxiomKind] = {
arg.toLowerCase match {
case "all" => Right(AllAxioms)
case "logical" => Right(LogicalAxioms)
case "annotation" => Right(AnnotationAxioms)
case _ => Left(MalformedValue("Not a valid axiom type", arg))
}
}

}

final case class DOSDPError(msg: String, cause: Throwable) extends Exception(msg, cause)
Expand Down
27 changes: 11 additions & 16 deletions src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import java.io.{File, StringReader}
import cats.implicits._
import com.github.tototoshi.csv.{CSVFormat, CSVReader}
import org.monarchinitiative.dosdp.Utilities.isDirectory
import org.monarchinitiative.dosdp.cli.Config.{AllAxioms, AnnotationAxioms, AxiomKind, LogicalAxioms}
import org.monarchinitiative.dosdp.{AxiomType => _, _}
import org.phenoscape.scowl._
import org.semanticweb.owlapi.model._
Expand All @@ -21,10 +22,9 @@ object Generate {

def run(config: GenerateConfig): ZIO[ZEnv, DOSDPError, Unit] =
for {
axiomsOutput <- ZIO.fromEither(axiomsOutputChoice(config))
(outputLogicalAxioms, outputAnnotationAxioms) = axiomsOutput
ontologyOpt <- config.common.ontologyOpt
prefixes <- config.common.prefixesMap
(outputLogicalAxioms, outputAnnotationAxioms) = axiomsOutputChoice(config.restrictAxiomsTo)
sepFormat <- ZIO.fromEither(Config.tabularFormat(config.common.tableFormat))
axiomSourceProperty <- ZIO.fromOption(Prefixes.idToIRI(config.axiomSourceAnnotationProperty, prefixes).map(AnnotationProperty(_)))
.orElseFail(DOSDPError("Couldn't create IRI for axiom source annotation property."))
Expand Down Expand Up @@ -98,13 +98,10 @@ object Generate {
dataListBindings +
iriBinding
annotationBindings = eDOSDP.substitutions.foldLeft(initialAnnotationBindings)((bindings, sub) => sub.expandBindings(bindings)) ++ additionalBindings
localOutputLogicalAxiomsWithLocalOutputAnnotationAxioms <- restrictAxiomsColumnName.flatMap(column => row.get(column)).map(_.trim).map {
case "all" => Right((true, true))
case "logical" => Right((true, false))
case "annotation" => Right((false, true))
case "" => Right((outputLogicalAxioms, outputAnnotationAxioms))
case other => Left(DOSDPError(s"Invalid value for restrict-axioms-column: $other"))
}.getOrElse(Right((outputLogicalAxioms, outputAnnotationAxioms)))
localOutputLogicalAxiomsWithLocalOutputAnnotationAxioms <- restrictAxiomsColumnName.flatMap(column => row.get(column).flatMap(value => stripToOption(value)))
.map(Config.parseAxiomKind)
.map(maybeAxiomKind => maybeAxiomKind.map(axiomsOutputChoice))
.getOrElse(Right((outputLogicalAxioms, outputAnnotationAxioms))).leftMap(e => DOSDPError(s"Malformed value in table restrict-axioms-column: ${e.error}"))
(localOutputLogicalAxioms, localOutputAnnotationAxioms) = localOutputLogicalAxiomsWithLocalOutputAnnotationAxioms
logicalAxioms = if (localOutputLogicalAxioms) eDOSDP.filledLogicalAxioms(Some(logicalBindings), Some(annotationBindings)) else Set.empty
annotationAxioms = if (localOutputAnnotationAxioms) eDOSDP.filledAnnotationAxioms(Some(annotationBindings), Some(logicalBindings)) else Set.empty
Expand Down Expand Up @@ -160,13 +157,11 @@ object Generate {
}
} yield columns -> data

private def axiomsOutputChoice(config: GenerateConfig): Either[DOSDPError, (Boolean, Boolean)] =
config.restrictAxiomsTo match {
case "all" => Right((true, true))
case "logical" => Right((true, false))
case "annotation" => Right((false, true))
case other => Left(DOSDPError(s"Invalid argument for restrict-axioms-to: $other"))
}
def axiomsOutputChoice(kind: AxiomKind): (Boolean, Boolean) = kind match {
case AllAxioms => (true, true)
case LogicalAxioms => (true, false)
case AnnotationAxioms => (false, true)
}

private def createReadableIdentifierIndex(dosdp: ExpandedDOSDP, ont: OWLOntology): Map[IRI, Map[IRI, String]] = {
val properties = dosdp.readableIdentifierProperties.to(Set)
Expand Down
27 changes: 14 additions & 13 deletions src/main/scala/org/monarchinitiative/dosdp/cli/Query.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package org.monarchinitiative.dosdp.cli

import java.io.{File, PrintWriter}

import com.github.tototoshi.csv.{CSVWriter, TSVFormat}
import com.github.tototoshi.csv.CSVWriter
import org.apache.jena.query.{QueryExecutionFactory, QueryFactory, QuerySolution}
import org.apache.jena.rdf.model.ModelFactory
import org.monarchinitiative.dosdp.Utilities.isDirectory
import org.monarchinitiative.dosdp.{ExpandedDOSDP, SPARQL, SesameJena}
import org.monarchinitiative.dosdp.cli.Config.AxiomKind
import org.monarchinitiative.dosdp.{DOSDP, ExpandedDOSDP, SPARQL, SesameJena}
import org.phenoscape.owlet.Owlet
import org.semanticweb.HermiT.ReasonerFactory
import org.semanticweb.elk.owlapi.ElkReasonerFactory
Expand Down Expand Up @@ -36,7 +37,8 @@ object Query {
ontologyOpt <- config.common.ontologyOpt
_ <- makeOptionalReasoner(ontologyOpt, reasonerFactoryOpt).use { reasonerOpt =>
ZIO.foreach(targets) { target =>
makeProcessedQuery(target, config, reasonerOpt).flatMap(processTarget(target, config, _, ontologyOpt))
ZIO.effectTotal(scribe.info(s"Processing pattern ${target.templateFile}")) *>
createQuery(target, config, reasonerOpt).flatMap(processTarget(target, config, _, ontologyOpt))
}
}
} yield ()
Expand All @@ -51,16 +53,17 @@ object Query {
.toManaged(o => ZIO.effectTotal(o.dispose())))(identity)
}

private def makeProcessedQuery(target: QueryTarget, config: QueryConfig, reasonerOpt: Option[OWLReasoner]): ZIO[Any, DOSDPError, String] = {
private def createQuery(target: QueryTarget, config: QueryConfig, reasonerOpt: Option[OWLReasoner]): ZIO[Any, DOSDPError, String] =
for {
_ <- ZIO.effectTotal(scribe.info(s"Processing pattern ${target.templateFile}"))
dosdp <- Config.inputDOSDPFrom(target.templateFile)
prefixes <- config.common.prefixesMap
sparqlQuery = SPARQL.queryFor(ExpandedDOSDP(dosdp, prefixes))
processedQuery = reasonerOpt.map { reasoner =>
new Owlet(reasoner).expandQueryString(sparqlQuery)
}.getOrElse(sparqlQuery)
} yield processedQuery
} yield makeProcessedQuery(dosdp, prefixes, config.restrictAxiomsTo, reasonerOpt)

def makeProcessedQuery(dosdp: DOSDP, prefixes: PartialFunction[String, String], axiomKind: AxiomKind, reasonerOpt: Option[OWLReasoner]): String = {
val sparqlQuery = SPARQL.queryFor(ExpandedDOSDP(dosdp, prefixes), axiomKind)
reasonerOpt.map { reasoner =>
new Owlet(reasoner).expandQueryString(sparqlQuery)
}.getOrElse(sparqlQuery)
}

private def processTarget(target: QueryTarget, config: QueryConfig, processedQuery: String, ontologyOpt: Option[OWLOntology]): ZIO[Any, DOSDPError, Unit] = {
Expand All @@ -73,8 +76,7 @@ object Query {
_ <- ZIO.effect(CSVWriter.open(target.outputFile, "utf-8")(sepFormat))
.bracketAuto(w => writeQueryResults(w, columns, results))
} yield ()
ZIO.effectTotal(scribe.info(s"Processing pattern ${target.templateFile}")) *>
(if (config.printQuery.bool) doPrintQuery else doPerformQuery).mapError(e => DOSDPError("Failure performing query command", e))
(if (config.printQuery.bool) doPrintQuery else doPerformQuery).mapError(e => DOSDPError("Failure performing query command", e))
}

private def writeQueryResults(writer: CSVWriter, columns: List[String], results: List[QuerySolution]) =
Expand All @@ -83,7 +85,6 @@ object Query {
}

private def determineTargets(config: QueryConfig): RIO[Blocking, List[QueryTarget]] = {
val sepFormat = Config.tabularFormat(config.common.tableFormat)
val patternNames = config.common.batchPatterns.items
if (patternNames.nonEmpty) for {
_ <- ZIO.effectTotal(scribe.info("Running in batch mode"))
Expand Down
Loading

0 comments on commit 183f8ca

Please sign in to comment.