diff --git a/Cargo.lock b/Cargo.lock index a79e1c528..727d77c5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4920,6 +4920,7 @@ dependencies = [ "sedona-expr", "sedona-geometry", "sedona-schema", + "sedona-serde", "sedona-testing", "serde_json", "tokio", @@ -5195,6 +5196,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "sedona-serde" +version = "0.3.0" +dependencies = [ + "arrow-array", + "byteorder", + "datafusion-common", + "sedona-schema", + "wkb", + "wkt 0.14.0", +] + [[package]] name = "sedona-spatial-join" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 9780fbe16..def576c19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ members = [ "rust/sedona-schema", "rust/sedona-spatial-join", "rust/sedona-testing", + "rust/sedona-serde", "rust/sedona", "sedona-cli", ] diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 9fb7f35f4..9cd7532ce 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -49,6 +49,7 @@ sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } sedona-schema = { workspace = true } +sedona-serde = { path = "../sedona-serde" } wkb = { workspace = true } wkt = { workspace = true } serde_json = { workspace = true } diff --git a/rust/sedona-functions/src/fixtures/crs_point.sedona b/rust/sedona-functions/src/fixtures/crs_point.sedona new file mode 100644 index 000000000..413a310f0 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/crs_point.sedona @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// Point XY with CRS EPSG:4326 +19 + +// CRS EPSG:4326 +0 16 230 + +// number of coordinates +1 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 diff --git a/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona b/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona new file mode 100644 index 000000000..835a25ec3 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +114 + +0 0 0 + +0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/empty_linestring.sedona b/rust/sedona-functions/src/fixtures/empty_linestring.sedona new file mode 100644 index 000000000..820e8ba96 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_linestring.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +34 + +0 0 0 + +0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona b/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona new file mode 100644 index 000000000..af8bd94e7 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +82 + +0 0 0 + +0 0 0 0 0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona b/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona new file mode 100644 index 000000000..a01f58f3e --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +98 + +0 0 0 + +0 0 0 0 0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/empty_point.sedona b/rust/sedona-functions/src/fixtures/empty_point.sedona new file mode 100644 index 000000000..61641a333 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_point.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +18 + +0 0 0 + +0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/empty_polygon.sedona b/rust/sedona-functions/src/fixtures/empty_polygon.sedona new file mode 100644 index 000000000..613ce9e6b --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_polygon.sedona @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +50 + +0 0 0 + +0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/geometrycollection.sedona b/rust/sedona-functions/src/fixtures/geometrycollection.sedona new file mode 100644 index 000000000..1d2a8103c --- /dev/null +++ b/rust/sedona-functions/src/fixtures/geometrycollection.sedona @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata GeometryCollection XY NO SRID +114 + +// missing srid information +0 0 0 + +// number of geometries +3 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 + +// polygon geometry +50 + +0 0 0 + +4 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +1 0 0 0 +4 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona b/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona new file mode 100644 index 000000000..c317ffb3d --- /dev/null +++ b/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata GeometryCollection XY NO SRID +114 + +// missing srid information +0 0 0 + +// number of geometries +4 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 + +// polygon geometry +50 + +0 0 0 + +4 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +1 0 0 0 +4 0 0 0 + +// multipoint geometry +66 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 240 63 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 8 64 0 0 0 0 0 0 16 64 diff --git a/rust/sedona-functions/src/fixtures/linestring.sedona b/rust/sedona-functions/src/fixtures/linestring.sedona new file mode 100644 index 000000000..324b8d075 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/linestring.sedona @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata LINESTRING XY NO SRID +34 + +// missing srid information +0 0 0 + +// number of coordinates +3 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 diff --git a/rust/sedona-functions/src/fixtures/multilinestring.sedona b/rust/sedona-functions/src/fixtures/multilinestring.sedona new file mode 100644 index 000000000..f42d5c579 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multilinestring.sedona @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata MultiLinestring XY NO SRID +82 + +// missing srid information +0 0 0 + +// number of points +4 0 0 0 + +// coordinates linestring 1 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +// coordinates linestring 2 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 28 64 + +// number of linestrings +2 0 0 0 + +// linestring 1 has 2 points +2 0 0 0 + +// linestring 2 has 2 points +2 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/multipoint.sedona b/rust/sedona-functions/src/fixtures/multipoint.sedona new file mode 100644 index 000000000..a5570297a --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipoint.sedona @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata Multipoint XY NO SRID +66 + +// missing srid information +0 0 0 + +// number of points +3 0 0 0 + +// coordinates (2 doubles per point) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 20 64 diff --git a/rust/sedona-functions/src/fixtures/multipoint_empty.sedona b/rust/sedona-functions/src/fixtures/multipoint_empty.sedona new file mode 100644 index 000000000..df5fea65e --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipoint_empty.sedona @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +66 + +0 0 0 + +0 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/multipolygon.sedona b/rust/sedona-functions/src/fixtures/multipolygon.sedona new file mode 100644 index 000000000..49891bf18 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipolygon.sedona @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata Multipolygon XY NO SRID +98 + +// missing srid information +0 0 0 + +// number of points +30 0 0 0 + +// polygon 1 coordinates +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 + +// polygon 2 coordinates +0 0 0 0 0 0 40 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 52 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 52 64 0 0 0 0 0 0 34 64 +0 0 0 0 0 0 40 64 0 0 0 0 0 0 34 64 +0 0 0 0 0 0 40 64 0 0 0 0 0 0 240 63 + +0 0 0 0 0 0 42 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 46 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 46 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 42 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 42 64 0 0 0 0 0 0 0 64 + +0 0 0 0 0 0 49 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 51 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 51 64 0 0 0 0 0 0 28 64 +0 0 0 0 0 0 49 64 0 0 0 0 0 0 28 64 +0 0 0 0 0 0 49 64 0 0 0 0 0 0 20 64 + +// number of polygons +2 0 0 0 + +// number of polygon 1 components +3 0 0 0 + +// number of points in polygon 1 components +5 0 0 0 +5 0 0 0 +5 0 0 0 + +// number of polygon 2 components +3 0 0 0 + +// number of points in polygon 2 components +5 0 0 0 +5 0 0 0 +5 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona b/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona new file mode 100644 index 000000000..d35ef0fac --- /dev/null +++ b/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata GeometryCollection XY NO SRID +114 + +0 0 0 + +2 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// geometry collection geometry +114 0 0 0 + +2 0 0 0 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// polygon geometry +50 + +0 0 0 + +5 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +1 0 0 0 +5 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/point.sedona b/rust/sedona-functions/src/fixtures/point.sedona new file mode 100644 index 000000000..bb127e1bc --- /dev/null +++ b/rust/sedona-functions/src/fixtures/point.sedona @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata POINT XY NO SRID +18 + +// srid information +0 0 0 + +// number of coordinates +1 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 diff --git a/rust/sedona-functions/src/fixtures/point_float_coords.sedona b/rust/sedona-functions/src/fixtures/point_float_coords.sedona new file mode 100644 index 000000000..4ba2f3f85 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/point_float_coords.sedona @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +50 + +0 0 0 + +11 0 0 0 + +59 111 47 211 252 176 40 64 +243 214 83 60 230 214 70 64 + +164 49 237 31 240 116 55 64 +235 19 96 200 247 248 80 64 + +131 251 161 159 176 72 65 64 +228 105 70 91 254 100 76 64 + +243 214 83 60 230 214 70 64 +131 251 161 159 176 72 65 64 + +81 184 200 34 101 224 61 64 +157 183 151 105 126 88 54 64 + +59 111 47 211 252 176 40 64 +243 214 83 60 230 214 70 64 + +149 70 55 221 154 31 57 64 +59 111 240 116 107 254 70 64 + +118 222 224 233 214 252 62 64 +75 163 155 110 205 15 73 64 + +210 152 246 15 120 186 65 64 +210 152 246 15 120 186 70 64 + +31 197 40 164 12 60 62 64 +59 111 240 116 107 126 68 64 + +149 70 55 221 154 31 57 64 +59 111 240 116 107 254 70 64 + +2 0 0 0 +6 0 0 0 +5 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/polygon.sedona b/rust/sedona-functions/src/fixtures/polygon.sedona new file mode 100644 index 000000000..c0c36bcc4 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/polygon.sedona @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// metadata Polygon XY NO SRID +50 + +// no srid +0 0 0 + +// number of points +15 0 0 0 + +// polygon 1 ring 1 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// hole 1 ring 1 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +// hole 2 ring 1 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 + +// number of rings +3 0 0 0 + +// number of points internal 1 +5 0 0 0 + +// number of points hole 1 +5 0 0 0 + +// number of points hole 2 +5 0 0 0 diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 44c8ad027..cce92a6d4 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -38,6 +38,7 @@ mod st_dwithin; pub mod st_envelope; pub mod st_envelope_agg; pub mod st_flipcoordinates; +mod st_from_sedona_spark; mod st_geometryn; mod st_geometrytype; mod st_geomfromwkb; @@ -61,6 +62,7 @@ mod st_reverse; mod st_setsrid; mod st_srid; mod st_start_point; +mod st_to_sedona_spark; mod st_transform; mod st_translate; pub mod st_union_agg; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ff4395787..2a94da236 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -120,6 +120,8 @@ pub fn default_function_set() -> FunctionSet { crate::st_xyzm::st_y_udf, crate::st_xyzm::st_z_udf, crate::st_zmflag::st_zmflag_udf, + crate::st_from_sedona_spark::st_geomfromsedona_udf, + crate::st_to_sedona_spark::st_geomtosedona_udf, ); register_aggregate_udfs!( diff --git a/rust/sedona-functions/src/st_from_sedona_spark.rs b/rust/sedona-functions/src/st_from_sedona_spark.rs new file mode 100644 index 000000000..bc5d7e490 --- /dev/null +++ b/rust/sedona-functions/src/st_from_sedona_spark.rs @@ -0,0 +1,254 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::executor::WkbExecutor; +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_common::cast::as_binary_array; +use datafusion_common::ScalarValue; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion_expr::{ColumnarValue, Documentation, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::crs::deserialize_crs; +use sedona_schema::datatypes::{Edges, SedonaType, WKB_GEOMETRY}; +use sedona_schema::matchers::ArgMatcher; +use sedona_serde::deserialize::deserialize; +use std::sync::Arc; + +fn to_crs_str(scalar_arg: &ScalarValue) -> Option { + if let Ok(ScalarValue::Utf8(Some(crs))) = scalar_arg.cast_to(&DataType::Utf8) { + return Some(crs); + } + + None +} + +#[derive(Debug)] +struct STGeomFromSedonaSpark { + out_type: SedonaType, +} + +pub fn st_geomfromsedona_udf() -> SedonaScalarUDF { + let kernel = Arc::new(STGeomFromSedonaSpark { + out_type: WKB_GEOMETRY, + }); + + SedonaScalarUDF::new( + "st_geomfromsedonaspark", + vec![kernel], + Volatility::Immutable, + Some(doc()), + ) +} + +impl SedonaScalarKernel for STGeomFromSedonaSpark { + fn return_type(&self, args: &[SedonaType]) -> datafusion_common::Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_binary(), ArgMatcher::is_string()], + self.out_type.clone(), + ); + + matcher.match_args(args) + } + + fn return_type_from_args_and_scalars( + &self, + args: &[SedonaType], + _scalar_args: &[Option<&ScalarValue>], + ) -> datafusion_common::Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_binary(), ArgMatcher::is_string()], + self.out_type.clone(), + ); + + if !matcher.matches(args) { + return Ok(None); + } + + let crs_scalar = _scalar_args.get(1).unwrap(); + + let crs_str_opt = if let Some(scalar_crs) = crs_scalar { + to_crs_str(scalar_crs) + } else { + None + }; + + match crs_str_opt { + Some(to_crs) => Ok(Some(SedonaType::Wkb( + Edges::Planar, + deserialize_crs(&to_crs)?, + ))), + _ => Ok(Some(SedonaType::Wkb(Edges::Planar, None))), + } + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> datafusion_common::Result { + let executor = WkbExecutor::new(arg_types, args); + let arg_array = args[0] + .cast_to(&DataType::Binary, None)? + .to_array(executor.num_iterations())?; + + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + for sedona_bytes in as_binary_array(&arg_array)?.into_iter().flatten() { + deserialize(&mut builder, sedona_bytes)?; + builder.append_value(vec![]); + } + + let new_array = builder.finish(); + executor.finish(Arc::new(new_array)) + } +} + +fn doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Internal only, it's function used in the vectorized UDFs to translate Sedona Spark binary format to WKB format.", + "ST_GeomFromSedonaSpark (geom: binary, crs: string)", + ) + .with_argument("geom", "sedona spark geometry binary") + .with_argument("crs", "crs: coordinate reference system") + .with_sql_example("SELECT ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 'EPSG:4326')") + .build() +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + use sedona_testing::testers::ScalarUdfTester; + + fn get_tester() -> ScalarUdfTester { + ScalarUdfTester::new( + st_geomfromsedona_udf().into(), + vec![ + SedonaType::Arrow(DataType::Binary), + SedonaType::Arrow(DataType::Utf8), + ], + ) + } + + fn fixture_to_bytes(wkb: &str) -> Vec { + wkb.split("\n") + .filter(|line| !line.starts_with("//") && !line.is_empty()) + .flat_map(|s| s.split_whitespace()) + .map(|num| num.parse::().expect("invalid byte")) + .collect::>() + } + + const POINT_WKT: &str = "POINT (1 1)"; + const LINESTRING_WKT: &str = "LINESTRING (0 0, 1 1, 2 2)"; + const MULTILINESTRING_WKT: &str = "MULTILINESTRING ((1 1, 2 2), (4 5, 6 7))"; + const MULTIPOINT_WKT: &str = "MULTIPOINT ((1 1), (2 2), (4 5))"; + const POLYGON_WKT: &str = "POLYGON ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + )"; + const MULTIPOLYGON_WKT: &str = "MULTIPOLYGON ( + ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + ), + ( + (12 1, 20 1, 20 9, 12 9, 12 1), + (13 2, 15 2, 15 4, 13 4, 13 2), + (17 5, 19 5, 19 7, 17 7, 17 5) + ) + )"; + const GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION ( + POINT (4 6), + LINESTRING (4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)) + )"; + + const COMPLEX_GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION( + POINT(4 6), + LINESTRING(4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)), + MULTIPOINT((1 2),(3 4)) + )"; + const NESTED_GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION ( + POINT (1 1), + GEOMETRYCOLLECTION ( + LINESTRING (0 0, 1 1), + POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0)) + ) + )"; + + const FLOATING_POLYGON_WKT: &str = "POLYGON ( + ( + 12.345678901234 45.678901234567, + 23.456789012345 67.890123456789, + 34.567890123456 56.789012345678, + 45.678901234567 34.567890123456, + 29.876543210987 22.345678901234, + 12.345678901234 45.678901234567 + ), + ( + 25.123456789012 45.987654321098, + 30.987654321098 50.123456789012, + 35.456789012345 45.456789012345, + 30.234567890123 40.987654321098, + 25.123456789012 45.987654321098 + ) + )"; + + #[rstest] + fn test_geometries_deserialization( + #[values( + (POINT_WKT, include_str!("fixtures/point.sedona")), + (LINESTRING_WKT, include_str!("fixtures/linestring.sedona")), + (MULTILINESTRING_WKT, include_str!("fixtures/multilinestring.sedona")), + (MULTIPOINT_WKT, include_str!("fixtures/multipoint.sedona")), + (POLYGON_WKT, include_str!("fixtures/polygon.sedona")), + (MULTIPOLYGON_WKT, include_str!("fixtures/multipolygon.sedona")), + (GEOMETRYCOLLECTION_WKT, include_str!("fixtures/geometrycollection.sedona")), + (COMPLEX_GEOMETRYCOLLECTION_WKT, include_str!("fixtures/geometrycollectioncomplex.sedona")), + (NESTED_GEOMETRYCOLLECTION_WKT, include_str!("fixtures/nested_geometry_collection.sedona")), + ("POINT EMPTY", include_str!("fixtures/empty_point.sedona")), + ("LINESTRING EMPTY", include_str!("fixtures/empty_linestring.sedona")), + ("POLYGON EMPTY", include_str!("fixtures/empty_polygon.sedona")), + ("MULTIPOINT EMPTY", include_str!("fixtures/multipoint_empty.sedona")), + ("MULTIPOLYGON EMPTY", include_str!("fixtures/empty_multipolygon.sedona")), + ("MULTILINESTRING EMPTY", include_str!("fixtures/empty_multilinestring.sedona")), + ("GEOMETRYCOLLECTION EMPTY", include_str!("fixtures/empty_geometry_collection.sedona")), + (FLOATING_POLYGON_WKT, include_str!("fixtures/point_float_coords.sedona")) + )] + value: (&str, &str), + ) { + let (expected_wkt, input_bytes) = value; + + let binary_geometry = fixture_to_bytes(input_bytes); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, expected_wkt); + } +} diff --git a/rust/sedona-functions/src/st_to_sedona_spark.rs b/rust/sedona-functions/src/st_to_sedona_spark.rs new file mode 100644 index 000000000..1cf63fcb6 --- /dev/null +++ b/rust/sedona-functions/src/st_to_sedona_spark.rs @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::executor::WkbExecutor; +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion_expr::{ColumnarValue, Documentation, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; +use sedona_serde::serialize::serialize; +use std::sync::Arc; + +#[derive(Debug)] +struct STGeomToSedonaSpark {} + +impl SedonaScalarKernel for STGeomToSedonaSpark { + fn return_type(&self, args: &[SedonaType]) -> datafusion_common::Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Binary), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> datafusion_common::Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let crs_value = match &arg_types[0] { + SedonaType::Wkb(_, crs) => { + match crs { + Some(_crs) => { + let crs_id = _crs.srid()?; + + match crs_id { + Some(srid) => Ok(Some(srid)), + None => Err(datafusion_common::DataFusionError::Internal( + "ST_GeomToSedonaSpark: Unsupported CRS without SRID".to_string(), + )), + } + } + None => Ok(None), + } + // + } + _ => Err(datafusion_common::DataFusionError::Internal( + "ST_GeomToSedonaSpark: Unsupported geometry type".to_string(), + )), + }?; + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + serialize(&item, &mut builder, crs_value)?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +pub fn st_geomtosedona_udf() -> SedonaScalarUDF { + let kernel = Arc::new(STGeomToSedonaSpark {}); + + SedonaScalarUDF::new( + "st_geomtosedonaspark", + vec![kernel], + Volatility::Immutable, + Some(doc()), + ) +} + +fn doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Internal only, it's function used in the vectorized UDFs to translate WKB to Sedona Spark binary format", + "ST_GeomToSedonaSpark (geom: Geometry, crs: string)", + ) + .with_argument("geom", "wkb geometry") + .with_sql_example("SELECT ST_GeomToSedonaSpark(geom)") + .build() +} + +#[cfg(test)] +mod tests { + use crate::st_to_sedona_spark::st_geomtosedona_udf; + use datafusion_common::ScalarValue; + use rstest::rstest; + use sedona_schema::crs::deserialize_crs; + use sedona_schema::datatypes::{Edges, SedonaType}; + use sedona_testing::create::create_scalar; + use sedona_testing::testers::ScalarUdfTester; + + const POINT_WKT: &str = "POINT (1 1)"; + const LINESTRING_WKT: &str = "LINESTRING (0 0, 1 1, 2 2)"; + const MULTILINESTRING_WKT: &str = "MULTILINESTRING ((1 1, 2 2), (4 5, 6 7))"; + const MULTIPOINT_WKT: &str = "MULTIPOINT ((1 1), (2 2), (4 5))"; + const POLYGON_WKT: &str = "POLYGON ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + )"; + const MULTIPOLYGON_WKT: &str = "MULTIPOLYGON ( + ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + ), + ( + (12 1, 20 1, 20 9, 12 9, 12 1), + (13 2, 15 2, 15 4, 13 4, 13 2), + (17 5, 19 5, 19 7, 17 7, 17 5) + ) + )"; + const GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION ( + POINT (4 6), + LINESTRING (4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)) + )"; + + const COMPLEX_GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION( + POINT(4 6), + LINESTRING(4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)), + MULTIPOINT((1 2),(3 4)) + )"; + const NESTED_GEOMETRYCOLLECTION_WKT: &str = "GEOMETRYCOLLECTION ( + POINT (1 1), + GEOMETRYCOLLECTION ( + LINESTRING (0 0, 1 1), + POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0)) + ) + )"; + + const FLOATING_POLYGON_WKT: &str = "POLYGON ( + ( + 12.345678901234 45.678901234567, + 23.456789012345 67.890123456789, + 34.567890123456 56.789012345678, + 45.678901234567 34.567890123456, + 29.876543210987 22.345678901234, + 12.345678901234 45.678901234567 + ), + ( + 25.123456789012 45.987654321098, + 30.987654321098 50.123456789012, + 35.456789012345 45.456789012345, + 30.234567890123 40.987654321098, + 25.123456789012 45.987654321098 + ) + )"; + + fn get_tester() -> ScalarUdfTester { + ScalarUdfTester::new( + st_geomtosedona_udf().into(), + vec![SedonaType::Wkb(Edges::Planar, None)], + ) + } + + fn fixture_to_bytes(wkb: &str) -> Vec { + wkb.split("\n") + .filter(|line| !line.starts_with("//") && !line.is_empty()) + .flat_map(|s| s.split_whitespace()) + .map(|num| num.parse::().expect("invalid byte")) + .collect::>() + } + + #[rstest] + fn test_geometries_serialization( + #[values( + (POINT_WKT, include_str!("fixtures/point.sedona")), + (LINESTRING_WKT, include_str!("fixtures/linestring.sedona")), + (MULTILINESTRING_WKT, include_str!("fixtures/multilinestring.sedona")), + (MULTIPOINT_WKT, include_str!("fixtures/multipoint.sedona")), + (POLYGON_WKT, include_str!("fixtures/polygon.sedona")), + (MULTIPOLYGON_WKT, include_str!("fixtures/multipolygon.sedona")), + (GEOMETRYCOLLECTION_WKT, include_str!("fixtures/geometrycollection.sedona")), + (COMPLEX_GEOMETRYCOLLECTION_WKT, include_str!("fixtures/geometrycollectioncomplex.sedona")), + (NESTED_GEOMETRYCOLLECTION_WKT, include_str!("fixtures/nested_geometry_collection.sedona")), + ("POINT EMPTY", include_str!("fixtures/empty_point.sedona")), + ("LINESTRING EMPTY", include_str!("fixtures/empty_linestring.sedona")), + ("POLYGON EMPTY", include_str!("fixtures/empty_polygon.sedona")), + ("MULTIPOINT EMPTY", include_str!("fixtures/multipoint_empty.sedona")), + ("MULTIPOLYGON EMPTY", include_str!("fixtures/empty_multipolygon.sedona")), + ("MULTILINESTRING EMPTY", include_str!("fixtures/empty_multilinestring.sedona")), + ("GEOMETRYCOLLECTION EMPTY", include_str!("fixtures/empty_geometry_collection.sedona")), + (FLOATING_POLYGON_WKT, include_str!("fixtures/point_float_coords.sedona")) + )] + value: (&str, &str), + ) { + let tester = get_tester(); + + let (input_wkt, fixture) = value; + + let geometry = create_scalar(Some(input_wkt), &SedonaType::Wkb(Edges::Planar, None)); + + let result = tester.invoke_scalar(geometry).unwrap(); + + let binary_geometry = fixture_to_bytes(fixture); + + assert_eq!(result, ScalarValue::Binary(Some(binary_geometry))); + } + + #[test] + fn test_serialization_with_crs() { + let crs = deserialize_crs("EPSG:4326").unwrap(); // to ensure Crs can be deserialized to provide + + let tester = ScalarUdfTester::new( + st_geomtosedona_udf().into(), + vec![SedonaType::Wkb(Edges::Planar, crs.clone())], + ); + + let geometry = create_scalar(Some(POINT_WKT), &SedonaType::Wkb(Edges::Planar, crs)); + + let result = tester.invoke_scalar(geometry).unwrap(); + + let expected_fixture = include_str!("fixtures/crs_point.sedona"); + let binary_geometry = fixture_to_bytes(expected_fixture); + + assert_eq!(result, ScalarValue::Binary(Some(binary_geometry))); + } +} diff --git a/rust/sedona-serde/Cargo.toml b/rust/sedona-serde/Cargo.toml new file mode 100644 index 000000000..dadad9cfe --- /dev/null +++ b/rust/sedona-serde/Cargo.toml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[package] +name = "sedona-serde" +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints.clippy] +result_large_err = "allow" + +[dev-dependencies] + +[dependencies] +arrow-array = { workspace = true } +sedona-schema = { workspace = true } +datafusion-common = { workspace = true } +wkt = { workspace = true } +byteorder = "1.5.0" +wkb = "0.9.2" diff --git a/rust/sedona-serde/src/deserialize.rs b/rust/sedona-serde/src/deserialize.rs new file mode 100644 index 000000000..907220825 --- /dev/null +++ b/rust/sedona-serde/src/deserialize.rs @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::linestring::{deserialize_linestring, deserialize_multilinestring}; +use crate::point::{deserialize_empty_point, deserialize_multipoint, deserialize_point}; +use crate::polygon::{deserialize_empty_polygon, deserialize_multipolygon, deserialize_polygon}; +use crate::wkb::write_wkb_byte_order_marker; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt}; +use datafusion_common::error::DataFusionError; +use std::io::Cursor; +use wkt::types::Dimension; + +pub fn deserialize(builder: &mut BinaryBuilder, bytes: &[u8]) -> datafusion_common::Result<()> { + use std::io::Cursor; + + if bytes.len() < 8 { + return Err(DataFusionError::Internal( + "Sedona bytes are too short".to_string(), + )); + } + + let mut reader = Cursor::new(bytes); + + deserialize_geometry::(builder, &mut reader, bytes) +} + +pub fn deserialize_geometry( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + bytes: &[u8], +) -> datafusion_common::Result<()> { + let preamble_byte = cursor.read_u8()?; + + let wkb_type = preamble_byte >> 4; + + let dimension = get_dimension((preamble_byte) >> 1); + + if dimension != Dimension::XY { + return Err(DataFusionError::Execution( + "Only 2D geometries (XY) are supported".to_string(), + )); + } + + let _has_srid = (preamble_byte & 0x01) != 0; + + cursor.set_position(cursor.position() + 3); // Skip 3 bytes + + match wkb_type { + 1 => { + let number_of_coordinates = cursor.read_u32::()?; + if number_of_coordinates == 0 { + deserialize_empty_point::(builder, dimension)?; + return Ok(()); + } + + deserialize_point::(builder, cursor, dimension)?; + } + 2 => { + deserialize_linestring::(builder, cursor, dimension)?; + } + 3 => { + let mut meta_data_reader = Cursor::new(bytes); + + let number_of_points = cursor.read_u32::()?; + if number_of_points == 0 { + deserialize_empty_polygon::(builder, dimension)?; + + return Ok(()); + } + + let metadata_start_position = number_of_points * 8 * 2; + meta_data_reader.set_position(cursor.position() + (metadata_start_position) as u64); + + deserialize_polygon::(builder, cursor, &mut meta_data_reader, dimension)?; + cursor.set_position(meta_data_reader.position()); + } + 4 => { + deserialize_multipoint::(builder, cursor, dimension)?; + } + 5 => { + let mut meta_data_reader = Cursor::new(bytes); + deserialize_multilinestring::( + builder, + cursor, + &mut meta_data_reader, + dimension, + )?; + cursor.set_position(meta_data_reader.position()); + } + 6 => { + let mut meta_data_reader = Cursor::new(bytes); + deserialize_multipolygon::(builder, cursor, &mut meta_data_reader, dimension)?; + cursor.set_position(meta_data_reader.position()); + } + 7 => { + let number_of_geometries = cursor.read_u32::()?; + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(get_byte_type_for_geometry_collection(dimension))?; + + builder.write_u32::(number_of_geometries)?; + + for _i in 0..number_of_geometries { + deserialize_geometry::(builder, cursor, bytes)?; + } + } + _ => { + return Err(DataFusionError::Execution(format!( + "Unsupported geometry type: {}", + wkb_type + ))) + } + } + + Ok(()) +} + +fn get_byte_type_for_geometry_collection(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 7u32, + Dimension::XYZ => 1007u32, + Dimension::XYM => 2007u32, + Dimension::XYZM => 3007u32, + } +} + +fn get_dimension(b: u8) -> Dimension { + match b { + 1 => Dimension::XY, + 2 => Dimension::XYZ, + 3 => Dimension::XYM, + 4 => Dimension::XYZM, + _ => Dimension::XY, + } +} diff --git a/rust/sedona-serde/src/lib.rs b/rust/sedona-serde/src/lib.rs new file mode 100644 index 000000000..b8b3fb8d3 --- /dev/null +++ b/rust/sedona-serde/src/lib.rs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod deserialize; +mod linestring; +mod point; +mod polygon; +pub mod serialize; +mod wkb; diff --git a/rust/sedona-serde/src/linestring.rs b/rust/sedona-serde/src/linestring.rs new file mode 100644 index 000000000..14f630a0a --- /dev/null +++ b/rust/sedona-serde/src/linestring.rs @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::wkb::write_wkb_byte_order_marker; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use std::io::{Cursor, Read, Write}; +use wkt::types::Dimension; + +fn get_linestring_marker(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 2u32, + Dimension::XYZ => 1002u32, + Dimension::XYM => 2002u32, + Dimension::XYZM => 3002u32, + } +} + +pub fn deserialize_linestring( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let number_of_points = cursor.read_u32::()?; + let byte_type = get_linestring_marker(dimension); + + write_wkb_byte_order_marker(builder)?; + + builder.write_u32::(byte_type)?; + + builder.write_u32::(number_of_points)?; + + let mut buf = [0u8; 8]; + for _ in 0..number_of_points * 2 { + cursor.read_exact(&mut buf)?; + _ = builder.write(&buf)?; + } + + Ok(()) +} + +pub fn deserialize_multilinestring( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 5u32, + Dimension::XYZ => 1005u32, + Dimension::XYM => 2005u32, + Dimension::XYZM => 3005u32, + }; + + let linestring_type = get_linestring_marker(dimension); + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(byte_type)?; + + let number_of_points = cursor.read_u32::()?; + + metadata_reader.set_position(cursor.position() + (number_of_points * 8 * 2) as u64); + + let number_of_geometries = metadata_reader.read_u32::()?; + + builder.write_u32::(number_of_geometries)?; + + for _ in 0..number_of_geometries { + let number_of_points_in_linestring = metadata_reader.read_u32::()?; + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(linestring_type)?; + + builder.write_u32::(number_of_points_in_linestring)?; + + for _ in 0..number_of_points_in_linestring * 2 { + let mut buf = [0u8; 8]; + cursor.read_exact(&mut buf)?; + + _ = builder.write(&buf)?; + } + } + + Ok(()) +} + +pub fn serialize_linestring( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> datafusion_common::Result<()> { + let number_of_points = cursor.read_u32::()?; + builder.write_u32::(number_of_points)?; + let mut buf = [0u8; 8]; + + for _ in 0..number_of_points * 2 { + cursor.read_exact(&mut buf)?; + _ = builder.write(&buf)?; + } + Ok(()) +} + +pub fn serialize_multilinestring( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> datafusion_common::Result<()> { + let number_of_linestrings = cursor.read_u32::()?; + + let metadata_vector = Vec::new(); + let mut metadata_cursor = Cursor::new(metadata_vector); + + let coordinates_vector = Vec::new(); + let mut coordinates_cursor = Cursor::new(coordinates_vector); + + let mut total_number_of_points = 0; + + metadata_cursor.write_u32::(number_of_linestrings)?; + + for _ in 0..number_of_linestrings { + let byte_order = cursor.read_u8()?; + let _geometry_type = cursor.read_u32::()?; + if _geometry_type != 2 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid geometry type in WKB".to_string(), + )); + } + + if byte_order != 1 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid byte order in WKB".to_string(), + )); + } + + let _number_of_points = cursor.read_u32::()?; + total_number_of_points += _number_of_points; + metadata_cursor.write_u32::(_number_of_points)?; + + for _ in 0.._number_of_points * 2 { + let mut buf = [0u8; 8]; + cursor.read_exact(&mut buf)?; + _ = coordinates_cursor.write(&buf)?; + } + } + + builder.write_u32::(total_number_of_points)?; + + _ = builder.write(coordinates_cursor.get_ref())?; + _ = builder.write(metadata_cursor.get_ref())?; + Ok(()) +} diff --git a/rust/sedona-serde/src/point.rs b/rust/sedona-serde/src/point.rs new file mode 100644 index 000000000..b8234cc13 --- /dev/null +++ b/rust/sedona-serde/src/point.rs @@ -0,0 +1,145 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::wkb::write_wkb_byte_order_marker; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use datafusion_common::error::Result; +use std::io::{Cursor, Read, Write}; +use wkt::types::Dimension; + +const NAN_2X: [u8; 16] = [0, 0, 0, 0, 0, 0, 248, 127, 0, 0, 0, 0, 0, 0, 248, 127]; + +fn get_byte_type_for_point(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 1u32, + Dimension::XYZ => 1001u32, + Dimension::XYM => 2001u32, + Dimension::XYZM => 3001u32, + } +} + +pub fn deserialize_empty_point( + builder: &mut BinaryBuilder, + dimension: Dimension, +) -> Result<()> { + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(get_byte_type_for_point(dimension))?; + + builder.write_f64::(f64::NAN)?; // X + builder.write_f64::(f64::NAN)?; // Y + + Ok(()) +} + +pub fn deserialize_point( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> Result<()> { + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(get_byte_type_for_point(dimension))?; + + let mut buf = [0u8; 8]; + cursor.read_exact(&mut buf)?; + + _ = builder.write(&buf)?; + + cursor.read_exact(&mut buf)?; + _ = builder.write(&buf)?; + + Ok(()) +} + +pub fn deserialize_multipoint( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> Result<()> { + let number_of_points = cursor.read_u32::()?; + + let byte_type = match dimension { + Dimension::XY => 4u32, + Dimension::XYZ => 1004u32, + Dimension::XYM => 2004u32, + Dimension::XYZM => 3004u32, + }; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(byte_type)?; + + if number_of_points == 0 { + builder.write_u32::(0)?; + return Ok(()); + } + + builder.write_u32::(number_of_points)?; + + for _ in 0..number_of_points { + deserialize_point::(builder, cursor, dimension)?; + } + + Ok(()) +} + +pub fn serialize_point( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> Result<()> { + let mut buf = [0u8; 16]; + cursor.read_exact(&mut buf)?; + if buf == NAN_2X { + builder.write_u32::(0)?; // no coordinates + + return Ok(()); + } + + builder.write_u32::(1)?; // numCoordinates + builder.write_all(&buf)?; + + Ok(()) +} + +pub fn serialize_multipoint( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> Result<()> { + let number_of_points = cursor.read_u32::()?; + builder.write_u32::(number_of_points)?; // numPoints + for _ in 0..number_of_points { + let endianness_marker = cursor.read_u8()?; + let _geometry_type = cursor.read_u32::()?; + + if _geometry_type != 1 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid geometry type in WKB".to_string(), + )); + } + + if endianness_marker != 1 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid byte order in WKB".to_string(), + )); + } + + let mut buf = [0u8; 16]; + cursor.read_exact(&mut buf)?; + builder.write_all(&buf)?; + } + + Ok(()) +} diff --git a/rust/sedona-serde/src/polygon.rs b/rust/sedona-serde/src/polygon.rs new file mode 100644 index 000000000..8d218d96d --- /dev/null +++ b/rust/sedona-serde/src/polygon.rs @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::wkb::write_wkb_byte_order_marker; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use std::io::{Cursor, Read, Write}; +use wkt::types::Dimension; + +pub(crate) fn get_polygon_marker(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 3u32, + Dimension::XYZ => 1003u32, + Dimension::XYM => 2003u32, + Dimension::XYZM => 3003u32, + } +} + +pub fn deserialize_polygon( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = get_polygon_marker(dimension); + let number_of_rings = metadata_reader.read_u32::()?; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(byte_type)?; + builder.write_u32::(number_of_rings)?; + + for _ in 0..number_of_rings { + let ring_number_of_points = metadata_reader.read_u32::()?; + builder.write_u32::(ring_number_of_points)?; + + let mut buf = [0u8; 8]; + for _ in 0..ring_number_of_points * 2 { + cursor.read_exact(&mut buf)?; + _ = builder.write(&buf)?; + } + } + + Ok(()) +} + +pub(crate) fn deserialize_multipolygon( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 6u32, + Dimension::XYZ => 1006u32, + Dimension::XYM => 2006u32, + Dimension::XYZM => 3006u32, + }; + + let number_of_points = cursor.read_u32::()?; + let metadata_start_position = number_of_points * 8 * 2; + metadata_reader.set_position(cursor.position() + (metadata_start_position) as u64); + + let number_of_geometries = metadata_reader.read_u32::()?; + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(byte_type)?; + builder.write_u32::(number_of_geometries)?; + + for _ in 0..number_of_geometries { + deserialize_polygon::(builder, cursor, metadata_reader, dimension)?; + } + + Ok(()) +} + +pub(crate) fn deserialize_empty_polygon( + builder: &mut BinaryBuilder, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 3u32, + Dimension::XYZ => 1003u32, + Dimension::XYM => 2003u32, + Dimension::XYZM => 3003u32, + }; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::(byte_type)?; + builder.write_u32::(0u32)?; // 0 rings + + Ok(()) +} + +pub fn serialize_polygon( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> datafusion_common::Result<()> { + let number_of_rings = cursor.read_u32::()?; + + let mut total_points = 0u32; + let coordinates_vector = Vec::new(); + let mut coordinates_cursor = Cursor::new(coordinates_vector); + let metadata_vector = Vec::new(); + let mut metadata_cursor = Cursor::new(metadata_vector); + + metadata_cursor.write_u32::(number_of_rings)?; + + for _ in 0..number_of_rings { + let number_of_points_in_ring = cursor.read_u32::()?; + metadata_cursor.write_u32::(number_of_points_in_ring)?; + + total_points += number_of_points_in_ring; + + let mut buf = vec![0u8; (number_of_points_in_ring * 8 * 2) as usize]; + cursor.read_exact(&mut buf)?; + _ = coordinates_cursor.write(&buf)?; + } + + if total_points != 0 { + builder.write_u32::(total_points)?; + + _ = builder.write(coordinates_cursor.get_ref())?; + } + + _ = builder.write(metadata_cursor.get_ref())?; + + Ok(()) +} + +pub fn serialize_multipolygon( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, +) -> datafusion_common::Result<()> { + let number_of_polygons = cursor.read_u32::()?; + + let mut total_points = 0u32; + let coordinates_vector = Vec::new(); + let mut coordinates_cursor = Cursor::new(coordinates_vector); + let metadata_vector = Vec::new(); + let mut metadata_cursor = Cursor::new(metadata_vector); + + metadata_cursor.write_u32::(number_of_polygons)?; + + for _ in 0..number_of_polygons { + let endianness_marker = cursor.read_u8()?; + let _geometry_type = cursor.read_u32::()?; + if endianness_marker != 1 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid byte order in WKB".to_string(), + )); + } + + if _geometry_type != 3 { + return Err(datafusion_common::DataFusionError::Internal( + "Invalid geometry type in WKB".to_string(), + )); + } + + let number_of_rings = cursor.read_u32::()?; + metadata_cursor.write_u32::(number_of_rings)?; + + for _ in 0..number_of_rings { + let number_of_points_in_ring = cursor.read_u32::()?; + metadata_cursor.write_u32::(number_of_points_in_ring)?; + + total_points += number_of_points_in_ring; + + let mut buf = vec![0u8; (number_of_points_in_ring * 8 * 2) as usize]; + cursor.read_exact(&mut buf)?; + _ = coordinates_cursor.write(&buf)?; + } + } + + builder.write_u32::(total_points)?; + + _ = builder.write(coordinates_cursor.get_ref())?; + _ = builder.write(metadata_cursor.get_ref())?; + + Ok(()) +} diff --git a/rust/sedona-serde/src/serialize.rs b/rust/sedona-serde/src/serialize.rs new file mode 100644 index 000000000..cb3631e64 --- /dev/null +++ b/rust/sedona-serde/src/serialize.rs @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::linestring::{serialize_linestring, serialize_multilinestring}; +use crate::point::{serialize_multipoint, serialize_point}; +use crate::polygon::{serialize_multipolygon, serialize_polygon}; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt}; +use datafusion_common::DataFusionError; +use std::io::Cursor; +use wkb::reader::Wkb; +use wkt::types::Dimension; + +pub fn serialize( + wkb: &Wkb, + builder: &mut BinaryBuilder, + epsg_crs: Option, +) -> datafusion_common::Result<()> { + use std::io::Cursor; + let mut cursor = Cursor::new(wkb.buf()); + let byte_order = cursor.read_u8()?; + + if byte_order != 1 && byte_order != 0 { + return Err(DataFusionError::Internal( + "Invalid byte order in WKB".to_string(), + )); + } + + match byte_order { + 0 => Err(DataFusionError::Internal( + "BigEndian WKB serialization not implemented".to_string(), + )), + 1 => write_geometry::(builder, &mut cursor, epsg_crs), + _ => unreachable!(), + } +} + +pub fn write_geometry( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + epsg_crs: Option, +) -> datafusion_common::Result<()> { + let geometry_type = cursor.read_u32::()?; + verify_geometry_type(geometry_type)?; + + let wkb_byte = geometry_type as u8; + + let preamble_byte: u8 = (wkb_byte << 4) + | (get_coordinate_type_value(Dimension::XY) << 1) + | if epsg_crs.is_some() { 1 } else { 0 }; + + builder.write_u8(preamble_byte)?; + + if let Some(srid) = epsg_crs { + builder.write_u8(((srid >> 16) & 0xFF) as u8)?; + builder.write_u8(((srid >> 8) & 0xFF) as u8)?; + builder.write_u8((srid & 0xFF) as u8)?; + } else { + builder.write_u8(0)?; + builder.write_u8(0)?; + builder.write_u8(0)?; + } + + match wkb_byte { + 1 => return serialize_point::(builder, cursor), + 2 => return serialize_linestring::(builder, cursor), + 3 => return serialize_polygon::(builder, cursor), + 4 => return serialize_multipoint::(builder, cursor), + 5 => return serialize_multilinestring::(builder, cursor), + 6 => return serialize_multipolygon::(builder, cursor), + 7 => { + let number_of_geometries = cursor.read_u32::()?; + builder.write_u32::(number_of_geometries)?; + for _ in 0..number_of_geometries { + _ = cursor.read_u8()?; + write_geometry::(builder, cursor, epsg_crs)?; + } + } + _ => { + return Err(DataFusionError::Internal( + "Geometry type not supported yet".to_string(), + )) + } + } + + Ok(()) +} + +fn verify_geometry_type(geometry_type: u32) -> datafusion_common::Result<()> { + match geometry_type { + 1..=7 => Ok(()), + _ => Err(DataFusionError::Internal( + "Unsupported geometry type".to_string(), + )), + } +} + +fn get_coordinate_type_value(dimension: Dimension) -> u8 { + match dimension { + Dimension::XY => 1, + Dimension::XYZ => 2, + Dimension::XYM => 3, + Dimension::XYZM => 4, + } +} diff --git a/rust/sedona-serde/src/wkb.rs b/rust/sedona-serde/src/wkb.rs new file mode 100644 index 000000000..d3b642ce4 --- /dev/null +++ b/rust/sedona-serde/src/wkb.rs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use byteorder::WriteBytesExt; + +pub(crate) const WKB_LITTLE_ENDIAN_MARKER: u8 = 1; + +pub fn write_wkb_byte_order_marker(writer: &mut W) -> std::io::Result<()> { + writer.write_u8(WKB_LITTLE_ENDIAN_MARKER) +} diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index 829474604..5736e9ed5 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -758,4 +758,68 @@ mod tests { .await .expect("should succeed because aws and gcs options were stripped"); } + + #[tokio::test] + async fn test_sedona_spark_serde() -> Result<()> { + let ctx = SedonaContext::new(); + + let geometry_data = ctx.sql( + "SELECT + ST_AsText( + ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 'EPSG:4326') + ) AS geom" + ) + .await? + .collect() + .await?; + + assert_batches_eq!( + [ + "+------------+", + "| geom |", + "+------------+", + "| POINT(1 1) |", + "+------------+", + ], + &geometry_data + ); + + let srid_value = ctx.sql( + "SELECT + ST_SRID( + ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 'EPSG:4326') + ) AS srid" + ) + .await? + .collect() + .await?; + + assert_batches_eq!( + ["+------+", "| srid |", "+------+", "| 4326 |", "+------+",], + &srid_value + ); + + let from_sedona_spark_and_reverse = ctx.sql( + "SELECT + ST_GeomToSedonaSpark( + ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 'EPSG:4326') + ) AS sedona_bytes" + ) + .await? + .collect() + .await?; + + assert_batches_eq!( + [ + "+--------------------------------------------------+", + "| sedona_bytes |", + "+--------------------------------------------------+", + "| 130010e601000000000000000000f03f000000000000f03f |", + "+--------------------------------------------------+", + ], + &from_sedona_spark_and_reverse + ); + + Ok(()) + } }