Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proposal]: Add UUID conversion to and from 16 byte fixed sequences #100

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
jobs:
test:
name: Build and test
runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v2
Expand All @@ -35,7 +35,7 @@ jobs:

dialyzer:
name: Run Dialyzer for type checking
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- name: Set mix file hash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:

jobs:
Publish:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
env:
HEX_API_KEY: ${{ secrets.HEXPM_SECRET }}
steps:
Expand Down
21 changes: 21 additions & 0 deletions lib/avro_ex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ defmodule AvroEx do
of blocks with their counts. This allows consumers of the encoded data to skip
over those blocks in an efficient manner. Using the option `include_block_byte_size: true`
enables adding those additional values.

## UUID encoding

UUIDs can be decoded as strings using the canonical hex representation with 37 bytes.
Alternatively, encoding UUIDs in their 16 byte binary representation is much
more compact, saving 21 bytes per encoding.
See "UUIDs" on `decode/3` for how to convert binary representations back to
canonical strings during decoding.
"""
@spec encode(Schema.t(), term, keyword()) ::
{:ok, encoded_avro} | {:error, AvroEx.EncodeError.t() | Exception.t()}
Expand Down Expand Up @@ -185,6 +193,19 @@ defmodule AvroEx do

Otherwise, an approximate number is calculated.

## UUIDs

When decoding a 16 byte fixed quantity with logical type "uuid", specify
`uuid_format: :binary` to retain the binary representation or
`uuid_format: :canonical_string` to convert to the canonical, hex as string representation.

iex> schema = AvroEx.decode_schema!(~S({"type": "fixed", "size": 16, "name": "fixed_uuid", "logicalType":"uuid"}))
iex> binary_uuid = <<85, 14, 132, 0, 226, 155, 65, 212, 167, 22, 68, 102, 85, 68, 0, 0>>
iex> AvroEx.decode(schema, binary_uuid, uuid_format: :binary)
{:ok, binary_uuid}
iex> AvroEx.decode(schema, binary_uuid, uuid_format: :canonical_string)
{:ok, "550e8400-e29b-41d4-a716-446655440000"}

"""
@spec decode(Schema.t(), encoded_avro, keyword()) ::
{:ok, term}
Expand Down
19 changes: 19 additions & 0 deletions lib/avro_ex/decode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,25 @@ defmodule AvroEx.Decode do
{:lists.nth(index + 1, symbols), rest}
end

defp do_decode(%Fixed{size: size = 16, metadata: %{"logicalType" => "uuid"}}, %Context{}, data, opts)
when is_binary(data) do
<<fixed::binary-size(size), rest::binary>> = data

case Keyword.get(opts, :uuid_format, :binary) do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without any opts-based configuration, the change would be backwards incompatible.
I'll gladly accept input on whether configuration is necessary at all and if so, the key and value names.

:binary ->
{fixed, rest}

:canonical_string ->
case Uniq.UUID.parse(fixed) do
{:ok, uuid} ->
{Uniq.UUID.to_string(uuid, :default), rest}

_ ->
error({:invalid_binary_uuid, fixed})
end
end
end

defp do_decode(%Fixed{size: size}, %Context{}, data, _) when is_binary(data) do
<<fixed::binary-size(size), rest::binary>> = data
{fixed, rest}
Expand Down
5 changes: 5 additions & 0 deletions lib/avro_ex/decode_error.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,9 @@ defmodule AvroEx.DecodeError do
message = "Invalid UTF-8 string found #{inspect(str)}."
%__MODULE__{message: message}
end

def new({:invalid_binary_uuid, binary_uuid}) do
message = "Invalid binary UUID found #{inspect(binary_uuid)}."
%__MODULE__{message: message}
end
end
5 changes: 5 additions & 0 deletions lib/avro_ex/encode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ defmodule AvroEx.Encode do
bin
end

defp do_encode(%Fixed{size: 16, metadata: %{"logicalType" => "uuid"}} = f, %Context{} = context, bin, opts)
when is_binary(bin) do
do_encode(f, context, Uniq.UUID.string_to_binary!(bin), opts)
end

defp do_encode(%Fixed{} = fixed, %Context{} = context, bin, _) when is_binary(bin) do
error({:incorrect_fixed_size, fixed, bin, context})
end
Expand Down
5 changes: 5 additions & 0 deletions lib/avro_ex/schema/fixed.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,10 @@ defmodule AvroEx.Schema.Fixed do
true
end

def match?(%__MODULE__{size: 16, metadata: %{"logicalType" => "uuid"}}, %Context{}, data)
when is_binary(data) do
Uniq.UUID.valid?(data)
end

def match?(_fixed, _context, _data), do: false
end
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ defmodule AvroEx.Mixfile do
{:dialyxir, "~> 1.1", only: :dev, runtime: false},
{:ex_doc, "~> 0.20", only: :dev, runtime: false},
{:stream_data, "~> 0.5", only: [:dev, :test]},
{:decimal, "~> 2.0", optional: true}
{:decimal, "~> 2.0", optional: true},
{:uniq, "~> 0.6"}
]
end

Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
"nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"},
"stream_data": {:hex, :stream_data, "0.5.0", "b27641e58941685c75b353577dc602c9d2c12292dd84babf506c2033cd97893e", [:mix], [], "hexpm", "012bd2eec069ada4db3411f9115ccafa38540a3c78c4c0349f151fc761b9e271"},
"typed_struct": {:hex, :typed_struct, "0.3.0", "939789e3c1dca39d7170c87f729127469d1315dcf99fee8e152bb774b17e7ff7", [:mix], [], "hexpm", "c50bd5c3a61fe4e198a8504f939be3d3c85903b382bde4865579bc23111d1b6d"},
"uniq": {:hex, :uniq, "0.6.1", "369660ecbc19051be526df3aa85dc393af5f61f45209bce2fa6d7adb051ae03c", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm", "6426c34d677054b3056947125b22e0daafd10367b85f349e24ac60f44effb916"},
}
27 changes: 27 additions & 0 deletions test/decode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,20 @@ defmodule AvroEx.Decode.Test do
"decimalField4" => 5.3e-11
}
end

test "16 byte fixed uuid" do
{:ok, fixed_uuid_schema} =
AvroEx.decode_schema(~S({"type": "fixed", "size": 16, "name": "fixed_uuid", "logicalType":"uuid"}))

# Example from https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation
canonical_string = "550e8400-e29b-41d4-a716-446655440000"
binary = :binary.encode_unsigned(113_059_749_145_936_325_402_354_257_176_981_405_696)

assert {:ok, ^binary} = AvroEx.decode(fixed_uuid_schema, binary, uuid_format: :binary)
assert {:ok, ^binary} = AvroEx.decode(fixed_uuid_schema, binary)

assert {:ok, ^canonical_string} = AvroEx.decode(fixed_uuid_schema, binary, uuid_format: :canonical_string)
end
end

describe "DecodingError" do
Expand All @@ -354,5 +368,18 @@ defmodule AvroEx.Decode.Test do
AvroEx.decode!(schema, <<"\nhell", 0xFFFF::16>>)
end
end

test "invalid fixed uuid" do
{:ok, fixed_uuid_schema} =
AvroEx.decode_schema(~S({"type": "fixed", "size": 16, "name": "fixed_uuid", "logicalType":"uuid"}))

non_uuid_binary = :binary.list_to_bin(List.duplicate(1, 16))

assert_raise DecodeError,
"Invalid binary UUID found <<1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>>.",
fn ->
AvroEx.decode!(fixed_uuid_schema, non_uuid_binary, uuid_format: :canonical_string)
end
end
end
end
58 changes: 39 additions & 19 deletions test/encode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,19 @@ defmodule AvroEx.Encode.Test do
"decimalField4" => 5.3e-11
}
end

test "16 byte fixed uuid" do
assert %AvroEx.Schema{} =
schema =
AvroEx.decode_schema!(%{"type" => "fixed", "size" => 16, "name" => "fixed_uuid", "logicalType" => "uuid"})

# Example from https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation
canonical_string = "550e8400-e29b-41d4-a716-446655440000"
binary = :binary.encode_unsigned(113_059_749_145_936_325_402_354_257_176_981_405_696)

assert {:ok, ^binary} = AvroEx.encode(schema, canonical_string)
assert {:ok, ^binary} = AvroEx.encode(schema, binary)
end
end

describe "variable_integer_encode" do
Expand Down Expand Up @@ -282,51 +295,50 @@ defmodule AvroEx.Encode.Test do
end

describe "encode (union)" do
defp union_index(index) do
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))
{:ok, index} = @test_module.encode(int_schema, index)
index
end

test "works as expected with nulls" do
{:ok, schema} = AvroEx.decode_schema(~S(["null", "int"]))
{:ok, null_schema} = AvroEx.decode_schema(~S("null"))
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))

{:ok, index} = @test_module.encode(int_schema, 0)
{:ok, encoded_null} = @test_module.encode(null_schema, nil)
{:ok, encoded_union} = @test_module.encode(schema, nil)

assert encoded_union == index <> encoded_null
assert encoded_union == union_index(0) <> encoded_null
end

test "works as expected with ints" do
{:ok, schema} = AvroEx.decode_schema(~S(["null", "int"]))
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))

{:ok, index} = @test_module.encode(int_schema, 1)
{:ok, encoded_int} = @test_module.encode(int_schema, 2086)
{:ok, encoded_union} = @test_module.encode(schema, 2086)

assert encoded_union == index <> encoded_int
assert encoded_union == union_index(1) <> encoded_int
end

test "works as expected with int and long" do
{:ok, schema} = AvroEx.decode_schema(~S(["int", "long"]))
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))
{:ok, long_schema} = AvroEx.decode_schema(~S("long"))

{:ok, index} = @test_module.encode(int_schema, 1)
{:ok, encoded_long} = @test_module.encode(long_schema, -3_376_656_585_598_455_353)
{:ok, encoded_union} = @test_module.encode(schema, -3_376_656_585_598_455_353)

assert encoded_union == index <> encoded_long
assert encoded_union == union_index(1) <> encoded_long
end

test "works as expected with float and double" do
{:ok, schema} = AvroEx.decode_schema(~S(["float", "double"]))
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))
{:ok, double_schema} = AvroEx.decode_schema(~S("double"))

{:ok, index} = @test_module.encode(int_schema, 1)
{:ok, encoded_long} = @test_module.encode(double_schema, 0.0000000001)
{:ok, encoded_union} = @test_module.encode(schema, 0.0000000001)

assert encoded_union == index <> encoded_long
assert encoded_union == union_index(1) <> encoded_long
end

test "works as expected with logical types" do
Expand All @@ -336,11 +348,23 @@ defmodule AvroEx.Encode.Test do
{:ok, schema} = AvroEx.decode_schema(~s(["null", #{datetime_json}]))
{:ok, datetime_schema} = AvroEx.decode_schema(datetime_json)

{:ok, index} = @test_module.encode(datetime_schema, 1)
{:ok, encoded_datetime} = @test_module.encode(datetime_schema, datetime_value)
{:ok, encoded_union} = @test_module.encode(schema, datetime_value)

assert encoded_union == index <> encoded_datetime
assert encoded_union == union_index(1) <> encoded_datetime
end

test "works as expected with 16 byte fixed UUID logical types" do
fixed_uuid_json = ~S({"type": "fixed", "size": 16, "name": "fixed_uuid", "logicalType": "uuid"})
uuid_value = "550e8400-e29b-41d4-a716-446655440000"

{:ok, schema} = AvroEx.decode_schema(~s(["null", #{fixed_uuid_json}]))
{:ok, fixed_uuid_schema} = AvroEx.decode_schema(fixed_uuid_json)

{:ok, encoded_uuid} = @test_module.encode(fixed_uuid_schema, uuid_value)
{:ok, encoded_union} = @test_module.encode(schema, uuid_value)

assert encoded_union == union_index(1) <> encoded_uuid
end

test "works as expected with records" do
Expand All @@ -358,14 +382,12 @@ defmodule AvroEx.Encode.Test do
json_schema = ~s(["null", #{record_json}])

{:ok, schema} = AvroEx.decode_schema(json_schema)
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))
{:ok, record_schema} = AvroEx.decode_schema(record_json)

{:ok, index} = @test_module.encode(int_schema, 1)
{:ok, encoded_record} = @test_module.encode(record_schema, %{"a" => 25, "b" => "hello"})
{:ok, encoded_union} = @test_module.encode(schema, %{"a" => 25, "b" => "hello"})

assert encoded_union == index <> encoded_record
assert encoded_union == union_index(1) <> encoded_record
end

test "works as expected with union values tagged for a named possibility" do
Expand All @@ -384,14 +406,12 @@ defmodule AvroEx.Encode.Test do
json_schema = ~s([#{record_json_factory.("a")}, #{record_json_factory.("b")}])

{:ok, schema} = AvroEx.decode_schema(json_schema)
{:ok, int_schema} = AvroEx.decode_schema(~S("int"))
{:ok, record_schema} = AvroEx.decode_schema(record_json_factory.("b"))

{:ok, index} = @test_module.encode(int_schema, 1)
{:ok, encoded_record} = @test_module.encode(record_schema, %{"value" => "hello"})
{:ok, encoded_union} = @test_module.encode(schema, {"b", %{"value" => "hello"}})

assert encoded_union == index <> encoded_record
assert encoded_union == union_index(1) <> encoded_record
end

test "errors with a clear error for tagged unions" do
Expand Down
Loading