Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add stac-geoparquet reading and writing #1521

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- Optional `geoparquet` feature to read and write item collections ([#1521](https://github.com/stac-utils/pystac/pull/1521))

### Fixed

- Make sure that `VersionRange` has `VersionID`s rather than strings ([#1512](https://github.com/stac-utils/pystac/pull/1512))
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ objects you'll need [`jinja2`](https://pypi.org/project/Jinja2/)
python -m pip install 'pystac[jinja2]'
```

If you want to read and write item collections as **stac-geoparquet**:

```shell
python -m pip install 'pystac[geoparquet]'
```

### Install from source

```shell
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = ["python-dateutil>=2.7.0"]
dynamic = ["version"]

[project.optional-dependencies]
geoparquet = ["stacrs>=0.5.1"]
jinja2 = ["jinja2<4.0"]
orjson = ["orjson>=3.5"]
urllib3 = ["urllib3>=1.26"]
Expand Down
50 changes: 47 additions & 3 deletions pystac/item_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,19 @@
return cls(items=items, extra_fields=extra_fields)

@classmethod
def from_file(cls: type[C], href: HREF, stac_io: pystac.StacIO | None = None) -> C:
def from_file(
cls: type[C],
href: HREF,
stac_io: pystac.StacIO | None = None,
is_geoparquet: bool | None = None,
) -> C:
"""Reads a :class:`ItemCollection` from a JSON file.

Arguments:
href : Path to the file.
stac_io : A :class:`~pystac.StacIO` instance to use for file I/O
is_geoparquet : Whether to read the file as stac-geoparquet. If
none, will be inferred from the file extension.
"""
if stac_io is None:
stac_io = pystac.StacIO.default()
Expand All @@ -205,26 +212,63 @@
if not is_absolute_href(href):
href = make_absolute_href(href)

d = stac_io.read_json(href)
if is_geoparquet or (is_geoparquet is None and href.endswith(".parquet")):
try:
import stacrs
except ImportError:
raise ImportError(

Check warning on line 219 in pystac/item_collection.py

View check run for this annotation

Codecov / codecov/patch

pystac/item_collection.py#L218-L219

Added lines #L218 - L219 were not covered by tests
"Could not import `stacrs`, which is required for "
"reading stac-geoparquet files. Enable pystac's `geoparquet` "
"feature to get stac-geoparquet support."
)

import asyncio

async def read() -> dict[str, Any]:
return await stacrs.read(href)

d = asyncio.run(read())
else:
d = stac_io.read_json(href)

return cls.from_dict(d, preserve_dict=False)

def save_object(
self,
dest_href: str,
stac_io: pystac.StacIO | None = None,
is_geoparquet: bool | None = None,
) -> None:
"""Saves this instance to the ``dest_href`` location.

Args:
dest_href : Location to which the file will be saved.
stac_io: Optional :class:`~pystac.StacIO` instance to use. If not provided,
will use the default instance.
is_geoparquet : Whether to write the file as stac-geoparquet. If
none, will be inferred from the file extension.
"""
if stac_io is None:
stac_io = pystac.StacIO.default()

stac_io.save_json(dest_href, self.to_dict())
if is_geoparquet or (is_geoparquet is None and dest_href.endswith(".parquet")):
try:
import stacrs
except ImportError:
raise ImportError(

Check warning on line 258 in pystac/item_collection.py

View check run for this annotation

Codecov / codecov/patch

pystac/item_collection.py#L257-L258

Added lines #L257 - L258 were not covered by tests
"Could not import `stacrs`, which is required for "
"writing stac-geoparquet files. Enable pystac's `geoparquet` "
"feature to get stac-geoparquet support."
)

import asyncio

async def write() -> None:
await stacrs.write(dest_href, self.to_dict())

asyncio.run(write())
else:
stac_io.save_json(dest_href, self.to_dict())

Check warning on line 271 in pystac/item_collection.py

View check run for this annotation

Codecov / codecov/patch

pystac/item_collection.py#L271

Added line #L271 was not covered by tests

@staticmethod
def is_item_collection(d: dict[str, Any]) -> bool:
Expand Down
Binary file not shown.
55 changes: 55 additions & 0 deletions tests/test_item_collection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from copy import deepcopy
from os.path import relpath
from pathlib import Path
from typing import Any, cast

import pytest
Expand Down Expand Up @@ -206,3 +207,57 @@ def test_to_dict_does_not_read_root_link_of_items() -> None:
item_collection.to_dict()

assert mock_stac_io.mock.read_text.call_count == 1


def test_read_geoparquet() -> None:
# This parquet file was created using stac-geoparquet v0.6.0 using the
# following snippet:
#
# import json
#
# import stac_geoparquet
#
# with open("tests/data-files/item-collection/sample-item-collection.json") as f:
# data = json.load(f)
#
# arrow = stac_geoparquet.arrow.parse_stac_items_to_arrow(data["features"])
# stac_geoparquet.arrow.to_parquet(
# arrow, "tests/data-files/item-collection/sample-item-collection.parquet"
# )

try:
import stacrs # noqa

has_stacrs = True
except ImportError:
has_stacrs = False

path = TestCases.get_path(
"data-files/item-collection/sample-item-collection.parquet"
)

if has_stacrs:
item_collection = ItemCollection.from_file(path)
assert len(item_collection) == 10
else:
with pytest.raises(ImportError):
item_collection = ItemCollection.from_file(path, is_geoparquet=True)


def test_write_geoparquet(tmp_path: Path, item_collection_dict: dict[str, Any]) -> None:
try:
import stacrs # noqa

has_stacrs = True
except ImportError:
has_stacrs = False

item_collection = ItemCollection.from_dict(item_collection_dict)

if has_stacrs:
item_collection.save_object(str(tmp_path / "item-collection.parquet"))
else:
with pytest.raises(ImportError):
item_collection.save_object(
str(tmp_path / "item-collection.parquet"), is_geoparquet=True
)
Loading
Loading