Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.2.2 #93

Merged
merged 9 commits into from
Nov 23, 2024
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Slack](https://img.shields.io/static/v1?logo=slack&logoColor=959DA5&label=Slack&

-----

**Latest Stable Release**: 2.2.1
**Latest Stable Release**: 2.2.2

-----

Expand Down
29 changes: 29 additions & 0 deletions release-notes/2.2.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Minitrino Release Notes: 2.2.2

## Release Overview

- [Minitrino Release Notes: 2.2.2](#minitrino-release-notes-222)
- [Release Overview](#release-overview)
- [CLI Changes and Additions](#cli-changes-and-additions)
- [Library Changes and Additions](#library-changes-and-additions)
- [Other](#other)

## CLI Changes and Additions

- Removed enormous and unsightly environment variable string preceding `docker
compose up` commands. It is no longer required since all Minitrino and shell
environment variables are sourced and provided to the subprocess that executes
the command.
- The `remove` command now logs removed images and volumes at the `info` level,
meaning `-v` is no longer a required argument to see which objects are
removed.
- Added full command output for commands that fail to error log.

## Library Changes and Additions

- Added named volumes for data persistence to all catalog modules (except `Db2`)
and modified the relevant bootstrap scripts to be idempotent.

## Other

- N/A
8 changes: 1 addition & 7 deletions src/cli/minitrino/cmd/cmd_provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def cli(ctx, modules, workers, no_rollback, docker_native):
cmd_chunk = chunk(modules)
compose_cmd = build_command(docker_native, cmd_chunk)

ctx.cmd_executor.execute_commands(compose_cmd, environment=ctx.env)
ctx.cmd_executor.execute_commands(compose_cmd, environment=ctx.env.copy())

c_restart = execute_bootstraps(modules)
c_restart = write_trino_cfg(c_restart, modules)
Expand Down Expand Up @@ -253,14 +253,8 @@ def build_command(ctx, docker_native="", chunk=""):
docker compose command string."""

cmd = []
compose_env_string = ""
for k, v in ctx.env.items():
compose_env_string += f'{k.upper()}="{v}" '

cmd.extend(
[
compose_env_string,
"\\\n",
"docker compose -f ",
os.path.join(ctx.minitrino_lib_dir, "docker-compose.yaml"),
" \\\n",
Expand Down
4 changes: 2 additions & 2 deletions src/cli/minitrino/cmd/cmd_remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def remove_items(ctx, item_type, force, labels=[]):
)
else:
ctx.docker_client.images.remove(image.short_id)
ctx.logger.verbose(
ctx.logger.info(
f"{item_type.title()} removed: {identifier}",
)
except APIError as e:
Expand All @@ -124,7 +124,7 @@ def remove_items(ctx, item_type, force, labels=[]):
volume.remove(force=True)
else:
volume.remove()
ctx.logger.verbose(
ctx.logger.info(
f"{item_type.title()} removed: {identifier}",
)
except APIError as e:
Expand Down
12 changes: 8 additions & 4 deletions src/cli/minitrino/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _user_init(self, verbose=False, user_env=[]):

self.env._log_env_vars()
self.cmd_executor = CommandExecutor(self)
self._get_docker_clients(env=self.env)
self._get_docker_clients(env=self.env.copy())

def _handle_minitrino_user_dir(self):
"""Checks if a Minitrino directory exists in the user home directory.
Expand Down Expand Up @@ -543,7 +543,8 @@ def _execute_in_shell(self, command="", **kwargs):
if process.returncode != 0 and kwargs.get("trigger_error", True):
raise err.MinitrinoError(
f"Failed to execute shell command:\n{command}\n"
f"Exit code: {process.returncode}"
f"Exit code: {process.returncode}\n"
f"Command output: {self._strip_ansi(output)}"
)

return {
Expand Down Expand Up @@ -620,13 +621,16 @@ def _execute_in_container(self, command="", **kwargs):
self._ctx.logger.warn(
f"The command exited with a 126 code which typically means an "
f"executable is not accessible or installed. Does this image have "
f"all required dependencies installed?\nCommand: {command}",
f"all required dependencies installed?\n"
f"Command: {command}\n"
f"Command output: {output}"
)

if exit_code != 0 and kwargs.get("trigger_error", True):
raise err.MinitrinoError(
f"Failed to execute command in container '{container.name}':\n{command}\n"
f"Exit code: {exit_code}"
f"Exit code: {exit_code}\n"
f"Command output: {output}"
)

return {"command": command, "output": output, "exit_code": exit_code}
Expand Down
2 changes: 1 addition & 1 deletion src/cli/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name="minitrino",
version="2.2.1",
version="2.2.2",
description="A command line tool that makes it easy to run modular Trino environments locally.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
7 changes: 7 additions & 0 deletions src/lib/modules/catalog/clickhouse/clickhouse.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ services:
ports:
- :8123
volumes:
- clickhouse-data:/var/lib/clickhouse
- ./modules/catalog/clickhouse/resources/clickhouse/init.sh:/docker-entrypoint-initdb.d/init.sh
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.clickhouse=catalog-clickhouse

volumes:
clickhouse-data:
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.clickhouse=catalog-clickhouse
33 changes: 33 additions & 0 deletions src/lib/modules/catalog/clickhouse/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,36 @@ trino-cli

trino> SHOW TABLES IN clickhouse.minitrino;
```

## Persistent Storage

This module uses named volumes to persist ClickHouse data:

```yaml
volumes:
clickhouse-data:
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.clickhouse=catalog-clickhouse
```

The user-facing implication is that ClickHouse data is retained even after
shutting down and/or removing the environment's containers. Minitrino issues a
warning about this whenever a module with named volumes is deployed––be sure to
look out for these warnings:

```log
[w] Module '<module>' has persistent volumes associated with it. To delete these volumes, remember to run `minitrino remove --volumes`.
```

To remove these volumes, run:

```sh
minitrino -v remove --volumes --label com.starburst.tests.module.clickhouse=catalog-clickhouse
```

Or, remove them directly using the Docker CLI:

```sh
docker volume rm minitrino_clickhouse-data
```
15 changes: 9 additions & 6 deletions src/lib/modules/catalog/clickhouse/resources/clickhouse/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,30 @@ clickhouse client -n <<-EOSQL
) ENGINE = MergeTree()
ORDER BY id;

-- Insert random data into table1
-- Insert random data into table1 only if there are fewer than 1000 rows
INSERT INTO minitrino.table1
SELECT
number AS id,
concat('Name_', toString(number % 100)) AS name,
rand() % 10000 / 100.0 AS value
FROM numbers(1000);
FROM numbers(1000)
WHERE (SELECT count() FROM minitrino.table1) < 1000;

-- Insert random data into table2
-- Insert random data into table2 only if there are fewer than 1000 rows
INSERT INTO minitrino.table2
SELECT
number AS id,
concat('Category_', toString(rand() % 10)) AS category,
rand() % 5000 / 100.0 AS amount
FROM numbers(1000);
FROM numbers(1000)
WHERE (SELECT count() FROM minitrino.table2) < 1000;

-- Insert random data into table3
-- Insert random data into table3 only if there are fewer than 1000 rows
INSERT INTO minitrino.table3
SELECT
number AS id,
now() - number * 60 AS timestamp,
rand() % 2 AS is_active
FROM numbers(1000);
FROM numbers(1000)
WHERE (SELECT count() FROM minitrino.table3) < 1000;
EOSQL
8 changes: 8 additions & 0 deletions src/lib/modules/catalog/elasticsearch/elasticsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ services:
ports:
- 9200:9200
- 9300:9300
volumes:
- elasticsearch-data:/usr/share/elasticsearch/data
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.elasticsearch=catalog-elasticsearch

volumes:
elasticsearch-data:
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.elasticsearch=catalog-elasticsearch
33 changes: 33 additions & 0 deletions src/lib/modules/catalog/elasticsearch/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,36 @@ by this module, located at:
```sh
lib/modules/catalog/elasticsearch/resources/bootstrap/bootstrap-elasticsearch.sh
```

## Persistent Storage

This module uses named volumes to persist ES data:

```yaml
volumes:
elasticsearch-data:
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.elasticsearch=catalog-elasticsearch
```

The user-facing implication is that ES data is retained even after shutting down
and/or removing the environment's containers. Minitrino issues a warning about
this whenever a module with named volumes is deployed––be sure to look out for
these warnings:

```log
[w] Module '<module>' has persistent volumes associated with it. To delete these volumes, remember to run `minitrino remove --volumes`.
```

To remove these volumes, run:

```sh
minitrino -v remove --volumes --label com.starburst.tests.module.elasticsearch=catalog-elasticsearch
```

Or, remove them directly using the Docker CLI:

```sh
docker volume rm minitrino_elasticsearch-data
```
Original file line number Diff line number Diff line change
@@ -1,38 +1,39 @@
#!/usr/bin/env bash

#-----------------------------------------------------------------------------------------------
# https://github.com/andrewpuch/elasticsearch_examples
#-----------------------------------------------------------------------------------------------

set -euxo pipefail

echo "Waiting for Elasticsearch to come up..."
wait-for-it elasticsearch:9200 --strict --timeout=60 -- echo "Elasticsearch service is up."

echo "Creating user index..."
curl -XPUT -H 'Content-Type: application/json' http://elasticsearch:9200/user?pretty=true -d'
{
"settings" : {
"index" : {
"number_of_replicas" : 0
echo "Checking if 'user' index exists..."
if curl -s http://elasticsearch:9200/_cat/indices | grep -q 'user'; then
echo "'user' index already exists. Skipping index creation and mapping."
else
echo "Creating 'user' index..."
curl -XPUT -H 'Content-Type: application/json' http://elasticsearch:9200/user?pretty=true -d'
{
"settings" : {
"index" : {
"number_of_replicas" : 0
}
}
}
}'
}'

echo "Creating user mapping..."
curl -XPUT 'http://elasticsearch:9200/user/_mapping' -H 'Content-Type: application/json' -d '
{
"properties" : {
"full_name" : { "type" : "text", "store" : true },
"bio" : { "type" : "text", "store" : true },
"age" : { "type" : "integer" },
"location" : { "type" : "text" },
"enjoys_coffee" : { "type" : "boolean" },
"created_on" : { "type" : "date" }
}
}
';
echo "Creating 'user' mapping..."
curl -XPUT 'http://elasticsearch:9200/user/_mapping' -H 'Content-Type: application/json' -d '
{
"properties" : {
"full_name" : { "type" : "text", "store" : true },
"bio" : { "type" : "text", "store" : true },
"age" : { "type" : "integer" },
"location" : { "type" : "text" },
"enjoys_coffee" : { "type" : "boolean" },
"created_on" : { "type" : "date" }
}
}'
fi

echo "Generating and inserting sample data..."
sudo pip install faker requests

cat << EOF > /tmp/generate_es_users.py
Expand All @@ -53,12 +54,14 @@ for i in range(1, 500):
}

response = requests.post(
f"http://elasticsearch:9200/user/_doc/{i}?pretty=true",
f"http://elasticsearch:9200/user/_doc/{i}",
headers={"Content-Type": "application/json"},
data=json.dumps(user)
)

print(f"Created user {i}, response: {response.status_code}")
if response.status_code == 409:
print(f"User {i} already exists. Skipping.")
else:
print(f"Created user {i}, response: {response.status_code}")
EOF

# Make the Python script executable
Expand Down
7 changes: 7 additions & 0 deletions src/lib/modules/catalog/iceberg/iceberg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ services:
AWS_ACCESS_KEY_ID: access-key
AWS_SECRET_ACCESS_KEY: secret-key
AWS_REGION: us-east-1
CATALOG_URI: jdbc:sqlite:/home/iceberg/iceberg.db
CATALOG_WAREHOUSE: s3://sample-bucket/wh/
CATALOG_IO__IMPL: org.apache.iceberg.aws.s3.S3FileIO
CATALOG_S3_ENDPOINT: http://s3.us-east-1.minio.com:9000
ports:
- 8181:8181
volumes:
- iceberg-metadata:/home/iceberg
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.iceberg=catalog-iceberg
Expand Down Expand Up @@ -89,6 +92,10 @@ services:
- com.starburst.tests.module.iceberg=catalog-iceberg

volumes:
iceberg-metadata:
labels:
- com.starburst.tests=minitrino
- com.starburst.tests.module.iceberg=catalog-iceberg
minio-iceberg-data:
labels:
- com.starburst.tests=minitrino
Expand Down
Loading
Loading