Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate Manifold #24

Merged
merged 4 commits into from
Feb 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions lib/manifold/api/schema_generator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# frozen_string_literal: true

module Manifold
module API
# Handles schema generation for Manifold tables
class SchemaGenerator
VALID_OPERATORS = %w[AND OR NOT NAND NOR XOR XNOR].freeze

def initialize(dimensions_fields, manifold_yaml)
@dimensions_fields = dimensions_fields
@manifold_yaml = manifold_yaml
end

def dimensions_schema
[
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
"fields" => @dimensions_fields }
]
end

def manifold_schema
[
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
{ "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
"fields" => @dimensions_fields },
{ "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
"fields" => metrics_fields }
]
end

private

def metrics_fields
return [] unless @manifold_yaml["contexts"] && @manifold_yaml["metrics"]

@manifold_yaml["contexts"].map do |context_name, _context_config|
{
"name" => context_name,
"type" => "RECORD",
"mode" => "NULLABLE",
"fields" => context_metrics_fields
}
end
end

def context_metrics_fields
[
*countif_fields,
*sumif_fields
]
end

def countif_fields
return [] unless @manifold_yaml.dig("metrics", "countif")

[{
"name" => @manifold_yaml["metrics"]["countif"],
"type" => "INTEGER",
"mode" => "NULLABLE"
}]
end

def sumif_fields
return [] unless @manifold_yaml.dig("metrics", "sumif")

@manifold_yaml["metrics"]["sumif"].keys.map do |metric_name|
{
"name" => metric_name,
"type" => "INTEGER",
"mode" => "NULLABLE"
}
end
end

def validate_operator!(operator)
return if VALID_OPERATORS.include?(operator)

raise ArgumentError, "Invalid operator: #{operator}. Valid operators are: #{VALID_OPERATORS.join(", ")}"
end
end
end
end
67 changes: 52 additions & 15 deletions lib/manifold/api/workspace.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@

module Manifold
module API
# Handles terraform configuration generation
class TerraformGenerator
def initialize(name, vectors, vector_service, manifold_yaml)
@name = name
@vectors = vectors
@vector_service = vector_service
@manifold_yaml = manifold_yaml
end

def generate(path)
config = Terraform::WorkspaceConfiguration.new(@name)
@vectors.each do |vector|
vector_config = @vector_service.load_vector_config(vector)
config.add_vector(vector_config)
end
config.merge_config = @manifold_yaml["dimensions"]&.fetch("merge", nil) if @manifold_yaml["dimensions"]
config.write(path)
end
end

# Encapsulates a single manifold.
class Workspace
attr_reader :name, :template_path, :logger
Expand All @@ -27,11 +47,17 @@ def add
end

def generate(with_terraform: false)
return unless manifold_exists? && any_vectors?
return nil unless manifold_exists? && any_vectors?

tables_directory.mkpath
generate_dimensions
generate_terraform if with_terraform
generate_manifold
logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")

return unless with_terraform

generate_terraform
logger.info("Generated Terraform configuration for workspace '#{name}'.")
end

def tables_directory
Expand Down Expand Up @@ -74,16 +100,28 @@ def generate_dimensions
dimensions_path.write(dimensions_schema_json.concat("\n"))
end

def generate_manifold
manifold_schema_path.write(manifold_schema_json.concat("\n"))
end

def manifold_schema_path
tables_directory.join("manifold.json")
end

def schema_generator
@schema_generator ||= SchemaGenerator.new(dimensions_fields, manifold_yaml)
end

def manifold_schema
schema_generator.manifold_schema
end

def dimensions_schema
[
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
"fields" => dimensions_fields }
]
schema_generator.dimensions_schema
end

def dimensions_fields
vectors.filter_map do |vector|
@dimensions_fields ||= vectors.filter_map do |vector|
logger.info("Loading vector schema for '#{vector}'.")
@vector_service.load_vector_schema(vector)
end
Expand All @@ -106,13 +144,12 @@ def vectors
end

def generate_terraform
config = Terraform::WorkspaceConfiguration.new(name)
vectors.each do |vector|
vector_config = @vector_service.load_vector_config(vector)
config.add_vector(vector_config)
end
config.merge_config = manifold_yaml["dimensions"]&.fetch("merge", nil) if manifold_yaml["dimensions"]
config.write(terraform_main_path)
terraform_generator = TerraformGenerator.new(name, vectors, @vector_service, manifold_yaml)
terraform_generator.generate(terraform_main_path)
end

def manifold_schema_json
JSON.pretty_generate(manifold_schema)
end
end
end
Expand Down
35 changes: 18 additions & 17 deletions lib/manifold/templates/workspace_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,24 @@ dimensions:
merge:
source: lib/views/select_my_vector.sql

metrics:
- name: # Add your metric name here, e.g. Pageviews

id:
field: # Identify the field that uniquely identifies each manifold vector
type: # Specify the type of that field, e.g. INTEGER
timestamp:
interval: HOUR
field: timestamp

interval:
type: # Specify the interval type, e.g. TIMESTAMP or DATE
expression: # Compute the interval for the entry, e.g. TIMESTAMP_TRUNC(timestamp, HOUR)
contexts:
paid: IS_PAID(context.location)
organic: IS_ORGANIC(context.location)
paidOrganic:
fields:
- paid
- organic
operator: AND

aggregations:
# Add any aggregations this metric should present
metrics:
countif: tapCount
sumif:
sequenceSum:
field: context.sequence

source:
type: BIGQUERY_TABLE
project: # Add your project name here
dataset: # Add your dataset name here
table: # Add your table name
filter: # (optional) Add your filter condition here
source: my_project.my_dataset.my_table
filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
29 changes: 22 additions & 7 deletions lib/manifold/terraform/workspace_configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,28 @@ def dataset_config

def table_config
{
"dimensions" => {
"dataset_id" => name,
"project" => "${var.project_id}",
"table_id" => "Dimensions",
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
"depends_on" => ["google_bigquery_dataset.#{name}"]
}
"dimensions" => dimensions_table_config,
"manifold" => manifold_table_config
}
end

def dimensions_table_config
{
"dataset_id" => name,
"project" => "${var.project_id}",
"table_id" => "Dimensions",
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
"depends_on" => ["google_bigquery_dataset.#{name}"]
}
end

def manifold_table_config
{
"dataset_id" => name,
"project" => "${var.project_id}",
"table_id" => "Manifold",
"schema" => "${file(\"${path.module}/tables/manifold.json\")}",
"depends_on" => ["google_bigquery_dataset.#{name}"]
}
end

Expand Down
2 changes: 1 addition & 1 deletion lib/manifold/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Manifold
VERSION = "0.0.14"
VERSION = "0.0.15"
end
Loading