From 5e37aeda1b28721b512aebf43071a3bcc9e63609 Mon Sep 17 00:00:00 2001 From: Matthew Powers Date: Mon, 15 Dec 2025 00:17:08 -0500 Subject: [PATCH 1/2] docs: add flatgeobuf page --- docs/flatgeobuf.ipynb | 194 ++++++++++++++++++++++++++++++++++++++++++ docs/flatgeobuf.md | 90 ++++++++++++++++++++ mkdocs.yml | 1 + 3 files changed, 285 insertions(+) create mode 100644 docs/flatgeobuf.ipynb create mode 100644 docs/flatgeobuf.md diff --git a/docs/flatgeobuf.ipynb b/docs/flatgeobuf.ipynb new file mode 100644 index 00000000..ae59bccd --- /dev/null +++ b/docs/flatgeobuf.ipynb @@ -0,0 +1,194 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "64d209be", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "# SedonaDB + FlatGeobuf\n", + "\n", + "This page explains how to read FlatGeobuf files with SedonaDB.\n", + "\n", + "FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP.\n", + "\n", + "It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines.\n", + "\n", + "The examples on this page show you how to query FlatGeobuf files with SedonaDB over HTTP." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a746c47d", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "87c9bf67-cb6c-445c-8199-727bacbb412e", + "metadata": {}, + "source": [ + "# Read Microsoft Buildings FlatGeobuf data with SedonaDB\n", + "\n", + "The Microsoft buildings dataset is a comprehensive open dataset of building footprints extracted from satellite imagery using computer vision and deep learning.\n", + "\n", + "Here's how to read the Microsoft buildings dataset into a SedonaDB DataFrame and print a few rows." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "397ef4cf", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌─────────────────────────────────┐\n", + "│ wkb_geometry │\n", + "│ geometry │\n", + "╞═════════════════════════════════╡\n", + "│ POINT(-97.16154292 26.08759861) │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ POINT(-97.1606625 26.08481) │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ POINT(-97.16133375 26.08519809) │\n", + "└─────────────────────────────────┘\n" + ] + } + ], + "source": [ + "url = \"https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/microsoft-buildings_point.fgb.zip\"\n", + "df = sd.read_pyogrio(url)\n", + "df.show(3)" + ] + }, + { + "cell_type": "markdown", + "id": "120e8f67-8914-4545-8f31-d38d5b6d6e7e", + "metadata": {}, + "source": [ + "You can see that the Microsoft Buildings dataset contains the building centroids.\n", + "\n", + "Take a look at the schema and see how it contains the `wkb_geometry` column and the CRS." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5f4256d2-3ecb-41d1-839b-1deeb22a3600", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 1 field:\n", + " wkb_geometry: geometry" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.schema" + ] + }, + { + "cell_type": "markdown", + "id": "f24f6b0d-44af-403d-b64e-6c745612f8b8", + "metadata": {}, + "source": [ + "Now lets see how to read another FlatGeobuf dataset." + ] + }, + { + "cell_type": "markdown", + "id": "d30ab78a-3692-48ea-836c-ed31d497a5fd", + "metadata": {}, + "source": [ + "# Read Vermont boundary FlatGeobuf data with SedonaDB\n", + "\n", + "The Vermont boundary dataset contains the polygon for the state of Vermont.\n", + "\n", + "The following example shows how to read the Vermont FlatGeobuf dataset and plot it." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "81b0558f", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAARQAAAGsCAYAAAAc6VhhAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMUtJREFUeJzt3Ql0FFW6B/AvSWffN7IREhJCImQHCbvMJM8QMojgIGBkm0EGJp6BQQYFkcUNdJTHqAgMB8QFRXwyOI4IArIMymYQCbIvkhAggSAJCWSvd+6VbrtDZ62qrq6u/++cPqS7b7pvmvQ/9373VrWdIAgCAQBIwF6KBwEAYBAoACAZBAoASAaBAgCSQaAAgGQQKAAgGQQKAEgGgQIAkkGgAIBkECgAIBmbC5Q9e/bQ0KFDKTQ0lOzs7GjTpk1tfgx2NMJrr71GXbt2JWdnZwoLC6OXXnpJlv4C2BId2ZjKykpKSkqiP/zhDzRixIh2Pca0adPoq6++4qGSkJBAN27c4BcAaJ6dLR8cyEYo//rXv+jhhx823FZdXU3PPvssffTRR3Tz5k2Kj4+nV155hQYNGsTvP3HiBCUmJtKxY8coNjZWwd4DqI/NTXla8uSTT9K+ffto/fr1dPToURo5ciQNHjyYzpw5w+///PPPKSoqiv7zn/9Q586dKTIykiZNmoQRCkAraCpQCgoK6J133qFPPvmEBgwYQNHR0TRz5kzq378/v505f/48Xbx4kbd57733aO3atZSXl0e///3vle4+gNWzuRpKc/Lz86m+vp4XW42xaZC/vz//uqGhgV9nYaJvt3r1aurRowedOnUK0yCAZmgqUCoqKsjBwYGPONi/xjw8PPi/ISEhpNPpTELnvvvuM4xwECgATdNUoKSkpPARSklJCZ/ymNOvXz+qq6ujc+fO8SkRc/r0af5vRESERfsLoDY2t8rDRiFnz541BMiSJUvoN7/5Dfn5+VGnTp3o8ccfp2+++YZef/11fv+1a9dox44dfGUnOzubT3nuv/9+PmJZunQpv56bm0teXl58KRkAmiHYmJ07d7KAvOcyfvx4fn9NTY0wb948ITIyUnB0dBRCQkKE4cOHC0ePHjU8RlFRkTBixAjBw8NDCAoKEiZMmCCUlpYq+FMBqIPNjVAAQDmaWjYGAHkhUABAmVWeBQsW0MKFC01uY8uoJ0+ebPJ72Aax5557jn766SeKiYnh29yHDBliuJ/NuObPn0+rVq3iW+HZKsvy5ct529ZihdPLly+Tp6cn324PANJh79Fbt27xA27t7VsYg7Sl4DJ//nyhe/fuwpUrVwyXa9euNdn+m2++ERwcHIRXX31VOH78uDB37lxeCM3Pzze0Wbx4seDt7S1s2rRJ+OGHH4SHHnpI6Ny5s3Dnzp1W96uwsNBsIRYXXHAhyS7sfSZpUZaNUNjpAI4cOdKq9qNGjeJH/7LjYvR69+5NycnJtGLFCp58LPWeeuopvgWeKSsro6CgIL7lffTo0a16HvY9Pj4+VFhYyJd3AUA65eXlFB4ezmcQ3t7e0m5sYwfRsRBwcXGhPn360KJFi/j+DnPYQXgzZswwuS0zM9NwjpILFy7Q1atXKSMjw3A/63BaWhr/3qYChW2NZxc9NhxjWJggUADk0ZpyQpuKsuyNzkYOW7Zs4XUOFghsx6n+Dd0YCws22jDGrrPb9ffrb2uqjTksxFjw6C8sPQFAeW0KlKysLH64P9tVykYamzdv5sOgDRs2kCXNnj2bT3P0FzbVAQCVLxuzugU7iE6/1b2x4OBgKi4uNrmNXWe36+/X39ZUG3PYaRn10xtMcwBsJFDYcTPsIDp2hK45rMbCjpMxtm3bNn47w05gxILDuA0rAB04cMDQBgBUpNVrs4IgPPXUU8KuXbuECxcu8CXhjIwMISAgQCgpKeH3jx07VnjmmWdMlo11Op3w2muvCSdOnODLzuaWjX18fITPPvuMH08zbNiwNi8bl5WV8WUt9i8ASKst7682rfJcunSJxowZQ6WlpRQYGMjPdLZ//37+tf58IcYbX/r27UsffvghzZ07l+bMmcM3q7EVHnYeV71Zs2bxpeXJkyfzegx7TFb0ZatIAKAuNnFwIJsmsdUeVqBFPQVAufcXjuUBAMkgUABAMggUAJAMAgUAJINAAQDJIFAAQDKa+hgNAFvT0CBQdV0DVdfVU1XtL/+y61W1v/xbXWv0tZk2SeE+9JvYDpL1B4ECIBLbylVTz96od9+8/E1r/KZu+g19T9vmgoHdpr/v7vex5xVjyaNJJCUECtiMunr2pvv1Dfzrm9T0zW22jbm2xm9ukze0aRv2r1q3h/aNDpD08RAoYFVDcHN/jY3bNBcW9Q0qfVcrJCrQnYK9pT3EBYGixSG48dDZ6Pb2D8F/faOLHYKD5fSN9pf8MREo1joEb2LubetDcLCcfhJPdxgEyt2/6Jvzr4oagpsLCQzBwVqx08P2jsIIRRa3a+op98PDSncDwGK6hXiRr7uT5I+LjW1EVFldp3QXACyqXxfppzsMAoWdyhKBAhrTR4aCLINAQaCAxujs7ahXpJ8sj41AQaCAxiSH+5C7szzlUwQKr6HUK90FAIvpK1P9hEGgoCgLGtNXpvoJg0Bhn42MQAGNcHG0p5ROPrI9PgIFIxTQkPsj/chZ5yDb4yNQECigIX1l2G5vDIHCpjxVCBTQhn5d5KufMAgUjFBAI7xcdNQ91FvW50CgsECpQaCA7esd5U8O9nayPgcChW9swz4UsH19ZVwu1kOgsECpqlW6CwCqPSDQGAIFO2VBAwI9nalLBw/ZnweBgmN5QCPTHTt2ViWZIVAQKKAB/WTef6Kn+UBhp3/EsjHYuj4WKMgymg8Udg7YOpz7FWxYJz83Cvdzs8hzaT5QMDoBW9fXQqMTRvOBgvoJ2Lq+Flgu1kOgIFDAxvWR4eMymqL5QMEeFLBlsUGefA+KpWg+UCqqsUsWbFcfC9ZPGAQKRihgw/pZsH7CaD5QsMoDtsrejigtys+yz0kah0ABW5XQ0Ye8XBwt+pyaDxScrQ1sVT8L108YzQfK9YpqpbsAoMrzx5qj+UA5drlc6S4ASM7JwZ56RvqSpWk6UGrrG+jEFQQK2J7UCB9ycZTv4zKaoulAOV18i2rqGpTuBoBqT1fQmKYD5VhRmdJdAJBFjwjLT3dI64GSj0ABG7XvfKkiz6vxQEH9BGzTRwcLFZnOazZQUJAFW98O8eWxKxZ/Xs0GypniChRkwaZ9sP+ixZ9Ts4GCgizYukM//WzxUbhmAwUFWdCC9/ZZdpSCQAGwYZu+L6KyO5Y7548mA6UOBVnQiDu19bTx8CV1BMrixYv5p5FNnz69yTa1tbX0/PPPU3R0NLm4uFBSUhJt2bLFpM2CBQv44xhf4uLiSC5nSir4x2cAaMH7+y/yz5+yBF17v/HQoUO0cuVKSkxMbLbd3Llz6YMPPqBVq1bxkNi6dSsNHz6cvv32W0pJSTG06969O23fvv3Xjuna3bUWYboDWnL+WiV9c7aU+sdY6YelV1RUUE5ODg8JX9/mt/i+//77NGfOHBoyZAhFRUXR1KlT+devv/66STsWIMHBwYZLQIB8PzxWeEBr3tv3k0Wep12BkpubS9nZ2ZSRkdFi2+rqaj7VMebq6kp79+41ue3MmTMUGhrKQ4eFVUFBQbOPWV5ebnJpC4xQQGu2nyimopt3rC9Q1q9fT4cPH6ZFixa1qn1mZiYtWbKEB0ZDQwNt27aNNm7cSFeu/LqLLy0tjdauXctrK8uXL6cLFy7QgAED6NatW2Yfkz23t7e34RIeHt6mguxxnAMFNKZBIProQNN/pBUJlMLCQpo2bRqtW7funlFHU/7xj39QTEwMr584OTnRk08+SRMnTiR7+1+fOisri0aOHMnrMSyANm/eTDdv3qQNGzaYfczZs2dTWVmZ4cL61VooyIJWrT9UQNV19dYTKHl5eVRSUkKpqam85sEuu3fvpjfeeIN/XV9/b2cDAwNp06ZNVFlZSRcvXqSTJ0+Sh4cHn9o0xcfHh7p27Upnz541e7+zszN5eXmZXFrr58qaVrcFsCXXK2poy7Gr1hMo6enplJ+fT0eOHDFcevbsyWse7GsHh6bPEMVGNGFhYVRXV0effvopDRs2rNmi77lz5ygkJISk5uvuJPljAqjFezLvnG3T2qynpyfFx8eb3Obu7k7+/v6G28eNG8eDQ19jOXDgABUVFVFycjL/l+05YbWUWbNmGR5j5syZNHToUIqIiKDLly/T/PnzeTiNGTOGpObrhkAB7cq7+DP9eLmMuod6q2OnLFudMS64VlVV8b0o3bp14/tPWNiwFR42rdG7dOkSD4/Y2Fh69NFHeUDt37+fT5ek5uNm2c8pAdDSUch2gqW20MmILRuz1R5WoG1NPaX7vC1UWYOPIAVtcnV0oP1z0snb1VHy95cmj+VBHQW0fnzP/+XJc3yPNgMFdRTQuH3n5DnnrDYDBSMU0LiYIA9ZHleTgeKHwixoXFywpyyPq8lA8cGUBzSuW0jrN4O2hSYDxQ9THtD45x53DnCX5bE1GSiooYDW6yc6B3ne+poMFD9MeUDD7pNpuqPZQPFFURY0LE6mgqx2AwVTHtCwbhihSAtFWdCyOASKtHCAIGhVkJezrH9QNRkozjoHcndq+twtALYqLli+0YlmA4VBHQW0KC5EvoKspgMFdRTQom4y1k80HSg44hi0KA5THnlgLwpocct9VKA8W+71tBsomPKAxnTp4EGOMm25J60HCrbfg9bcJ3P9RNOBghEKaM0DsdKf9F3Ux2jYEhRlQSu8XHS0dHQy/TYuSPbn0m6guKMoC9qY5qx4PJUi/OUtxpLWAwX7UMDWDU8Jo5eHJ5CrBXeFazdQMOUBG6Wzt6N5Q7vR2N4RZGdnZ9nnJo3CeWXBVg/+ezsnlXpE+Cny/JoNFCedPXk466iiuk7prgBIoldnP3rrsRTq4OlCStFsoOgLswgUsAWT+nemp7PiZN+41hJNBwqroxTeuKN0NwDazc3JgV55JJGGJoWSNdB0oKCOAmrWOcCdVo7tQV2D5D0lQVtoOlCwdAxq9WC3IHrt0STycrGu/VSaDhTslgW1sbcjmpkZS1MGRpM9u2JlNB0oftgtCyo75cabY1Kpf0wAWStNBwpqKKAWiR29+f6Sjr5uZM00HSiooYAajL4/nBY81J1cHK3/xOqaDhTUUMDaz7D2/LDuNLpXJ1ILTQcKRihgrcJ8XPkUJynch9RE04GC88qCNerfJYDeGJOiyj94mg4UFGXBmkQHulNOWgSN7xtJDla4JNwamg4UdoBgiLcLXSmrUroroFGujg6UnRjCC689InwtfroBqWk6UJgBMQG04btLSncDNCapozc9en84PwbH2na7iqH5QHmgawcECljs3K4jUjvSoz3DqVuo/GegV4LmA4UVwNh0tUFQuidgq/pE+dPoXuGU2T1YFXtJxNB8oHi7OVJyuA8dLripdFfAhnTwdKaRPX8ZjVjqBNHWQPOBop/2IFBALAd7O/pNbAdeYB0UG0g6hU92pAQEyt0PQPrf7aeV7gaoVIS/Gx+J/L5HRwryUu70i9YAgUJECWHefJPbz7drle4KqMxvYgNp9fj7rfJUAkrQ3pisiaHqgBj5P6YRbE/PSD+EiREEyl0PdEWgQNsldVTXsTZyQ6DcNaCr9Z60BqxXQkdvpbtgVRAod7HPMuluo5uNQL6TRHu72s4uVykgUIwMxLQH2rh9HkwhUIygjgJtkYj6yT0QKEZSO/nyjycFaI2kcIxQGkOgNDqdQd9of6W7ASrZatAtBIEiaaAsXryYn79h+vTpTbapra2l559/nqKjo8nFxYWSkpJoy5Yt97RbtmwZRUZG8jZpaWl08OBBUmrXLEBL2Kf1uTrZ9oF+Fg2UQ4cO0cqVKykxMbHZdnPnzuXt3nzzTTp+/DhNmTKFhg8fTt9//72hzccff0wzZsyg+fPn0+HDh3noZGZmUklJCVnaQGxwg1ZIxnRHukCpqKignJwcWrVqFfn6+jbb9v3336c5c+bQkCFDKCoqiqZOncq/fv311w1tlixZQk888QRNnDiRunXrRitWrCA3Nzdas2YNWVq4nxs/FR9Ac1CQlTBQcnNzKTs7mzIyMlpsW11dzacxxlxdXWnv3r3865qaGsrLyzN5LHt7e3593759TT5meXm5yUXqo48BWvrgLZAgUNavX8+nJYsWLWpVezZ1YSOQM2fOUENDA23bto02btxIV65c4fdfv36d6uvrKSgoyOT72PWrV6+afUz23N7e3oZLeHg4SWkgds1CM5x19ryGAiIDpbCwkKZNm0br1q27Z9TRlH/84x8UExNDcXFx5OTkRE8++SSf2rBRSHvNnj2bysrKDBfWLyn1jvLnvzQA5sQGe5KjBs910hptelXY1IQVSlNTU0mn0/HL7t276Y033uBfs5FGY4GBgbRp0yaqrKykixcv0smTJ8nDw4PXU5iAgABycHCg4uJik+9j14ODg832w9nZmby8vEwuUmKn6QvzdZX0McF2qPHzcqwyUNLT0yk/P5+OHDliuPTs2ZMXaNnXLBiawkY0YWFhVFdXR59++ikNGzaM385GLT169KAdO3YY2rKpEbvep08fUkJB6W06f61SkecG64fjd5rWpm2hnp6eFB8fb3Kbu7s7+fv7G24fN24cDw59jeXAgQNUVFREycnJ/N8FCxbwwJg1a5bhMdiS8fjx43k49erVi5YuXcpHNGxqpIQvj/1S3wEwB4HSNMn3mRcUFJjUR6qqqvhelPPnz/OpDlsyZkvJPj6/LruNGjWKrl27RvPmzeOFWBY+bPNb40KtpWw+Zr4YDMAgUJpmJwiC6j9Agi0bs9UeVqAVW0+59PNt6v/KTsn6BrZnbvZ9NGnALzVALShvw/sLpepGtmB0Ai3wwgilSQiURr5EoEALbOmjQ6WGQDFytayK8i7+rHQ3wMqhhtI0BIqRLVjdgVZAoDQNgWIEqzvQ2o+vBfMQKHeV3KqiQz/dULoboAIYoTQNgXLX1h+LSf0L6GCJM7W548RKTUKg3PVlPuon0DIvFx0/SyGYh0AhotKKatp/vlTpboAKYLrTPAQKEX11vJgaMN2BVkCgNA+BwlZ3MN2BVsIu2eZpPlB+rqyhb89hugOtgxFK8zQfKNtOFFM95jvQSgiU5mk+ULC6A22BKU/zNB0oZXdqae/Z60p3A1QEI5TmaTpQdpwoptp6THeg9RAozdN0oGzOx7E70DYIlOZpNlBuVdXSnjPXlO4GqAwCpXmaDZSvT5ZQTV2D0t0AFbG3I4oLxgd8NUezgfIlpjvQRmmd/cnfw1npblg1TQZKZXUd7TxVonQ3QGWyEsx/8BxoPFB2nbpG1ZjuQBtldkegtESTgbIZp3qENuoR4UtBXq37PG8t01yg3Kmpp50nMd2BtsmKx+ikNTQXKLtPX6PbNfd+qDtAczDdaR3NBcr2E8VKdwFUJiHMm8L93JTuhipoLlA6+roq3QVQmcGY7rSa5gJlQEyg0l0AlUGgtJ7mAiWpozd5uuiU7gaoRNcgD4oO9FC6G6qhuUDROdhTv+gApbsBKjE4PkTpLqiK5gKFGdgV0x5oHSwXt40mA2VADEYo0LJIfzccDNhGmgwUtgTYOcBd6W6ACqY7+FCvttFkoDAYpUBLMN1pOw0HCuoo0LQwH1dK7OitdDdUR7OB0ifan3TsjDkATWy1x3Sn7TQbKB7OOkqN8FW6G2ClcO6T9tFsoDADUUcBMwI8nCm1E/7YtIemAwV1FDAns3sQOWA63C6aDpT4MG/yccNZzMFUFnbHtpumA4X9FerfBdMe+BX7A5MW5ad0N1RL04HCDMS0B4z8z31B5Oig+bdFu2n+leuPwiwYweqOOJoPlFAfV+rSAYenA5Gns476YQosiuYDhcE2fGB+e18HctY5KN0NVUOgoI4Cd+HYHfEQKOwjJqP8yAmFOE1zdXSgB7p2ULobqod3ERG5OemoZyR2RmrZoNhAcnXCdEcsBMpd2DWrbTgRtTQQKHehMKtdbLr72zhMd6SAQLmrW4gX+bs7Kd0NUGgvkqcLDsGQAgLlLnt7O4xSNArTHekgUIygjqLN47nYdnuQBgLFCEYo2jOuTwT5YqorGQSKkQ5eLvjYBA0J9Xahpx6MVbobNkVUoCxevJifd3P69OnNtlu6dCnFxsaSq6srhYeH01//+leqqqoy3L9gwQL+OMaXuLg4UgJGKdrx/LB4fipQkE67X81Dhw7RypUrKTExsdl2H374IT3zzDO0Zs0a6tu3L50+fZomTJjAQ2PJkiWGdt27d6ft27f/2jGdTrFPFVz13wuKPDdYzpCEYMrohtqJ1Nr1rq2oqKCcnBxatWoVvfjii822/fbbb6lfv3702GOP8euRkZE0ZswYOnDggGlHdDoKDla+2n5/pB856+ypuq5B6a6ATDxddLRgaHelu2GT2jXlyc3NpezsbMrIyGixLRuV5OXl0cGDB/n18+fP0+bNm2nIkCEm7c6cOUOhoaEUFRXFw6qgoKDJx6yurqby8nKTi1RcHB2oV2ecscuWPZMVx+tlYAUjlPXr19Phw4f5lKc12Mjk+vXr1L9/fxIEgerq6mjKlCk0Z84cQ5u0tDRau3Ytr7NcuXKFFi5cSAMGDKBjx46Rp+e9RdJFixbxNnIeffzfM9dle3xQTs8IXxpzfyelu2Gz2jRCKSwspGnTptG6devIxaV1Cb9r1y56+eWX6e233+ZBtHHjRvriiy/ohRdeMLTJysqikSNH8npMZmYmH8HcvHmTNmzYYPYxZ8+eTWVlZYYL65eUBnRFYdYWOTrY0aIRCXwTI1jBCIVNXUpKSig1NdVwW319Pe3Zs4feeustPhVxcDA9YvO5556jsWPH0qRJk/j1hIQEqqyspMmTJ9Ozzz5L9vb3ZpqPjw917dqVzp49a7Yfzs7O/CKX2CBP6uDpTCW3qmV7DrC8qYO6UEwQtgVYzQglPT2d8vPz6ciRI4ZLz549ec2Dfd04TJjbt2/fExr6dmwK1FTR99y5cxQSoszHGbAVKOyatS1Rge7050HRSnfD5rVphMLqGfHx8Sa3ubu7k7+/v+H2cePGUVhYGK9zMEOHDuXLwykpKbxWwkYdbNTCbtcHy8yZM/n1iIgIunz5Ms2fP5/fx1aDlMLOj/Lp4UuKPT9I6+XhCbzgDvKSfLMHW50xHpHMnTuX/8Vn/xYVFVFgYCAPj5deesnQ5tKlSzw8SktL+f2sgLt//37+tVJ+vFym2HODtEbfH069o/yV7oYm2AlNzTtUhC0be3t78wKtl5eX6MdjL0m/xV/T5bJfd/OCOgV4ONGOGYPIG58QaZH3F47lMePk1VsIExsxoW8kwsSCEChmfH2yROkugESGJYcp3QVNQaCYseNEsdJdAAn0ivSjcD83pbuhKQiURkorqun7wptKdwMkMDwVoxNLQ6A0suvUNVJ/mRrYiaeHxCuzj0nLECiNoH5iG9Lv64BirAIQKEZq6xtoz+lrSncDJDA8BdMdJSBQjBz66Qbdqq5Tuhsgko+bIw2KxefsKAGBYuTrE5ju2ILfJYaQkw6/2krAq24E9RPbMDylo9Jd0CwEyl3nr1XQ+euVSncDRIrwd6PUTj5Kd0OzECh3YXRiGx5ODuMHo4IyECh3IVBsA1Z3lIVAYUdTVtXSwQs3lO4GiJTSyYciA9yV7oamIVCI6L+nr1NdA7bHqt0IjE4Uh0BhBwOexMGAaqezt6PfJYYq3Q3N03yg1DcItPsUdseqHdvIhg89V57mA+WHSzeptLJG6W6ASCNwZLFV0Hyg7MTqjk18tOhv47DV3hpoPlDCfXECHrXLTgjBGe2thOYDZUhiCLnil1HVsPfEemg+UDycdZQVH6x0N6Cdwnxc6f5IfLi9tdB8oDC/74GDydTq4ZRQfFaxFUGgEPEPgWJ/6UB9cGSxjX9yoBqxv3D/OyqZjl8uo8qaeqqorqPb1XVUUV1PldV1VFnDvq775evqesPX2F2rrMSO3tSlg4fS3QAjCJS7enX245e2fLpgdV0DD5bPjlym5/9zXNb+gfkji8G6IFDaiR0iz5Yq2cXbFSdDtjQHezt6KBlb7a0NaigSqGtoULoLmjMwJoACPJyV7gY0gkCRAGoplvcw9p5YJQSKRAcYgmU92A17h6wRAkUCtfUIFEsfu+PqhN3N1giBIoF61FAsytcNpymwVggUCaCGYlm++IhRq4VAkUAdpjwW5Y0RitVCoEgAIxTLwgjFeiFQJIAaimWhhmK9ECgSwAjFsrAz2XohUCSAGoplYcpjvRAoEsDGNsvywZTHaiFQJIBjeSzLByMUq4VAkQCmPJaFoqz1QqBIAEVZy8IIxXohUCSAGoploYZivRAoEsAIxbInVvJywXnBrBUCRQJ19SjKWnIPCjtbHlgnBIoEMEKxHNRPrBsCRQKooVgOVnisGwJFArWY8liMD7bdWzUEigQwQrEcrPBYNwSKBFBDsRzUUKwbAkUCWOWxHBwYaN0QKBLAlMdyMOWxbggUCWDKYzmY8lg3BIoEMEKxHCwb23CgLF68mO9anD59erPtli5dSrGxseTq6krh4eH017/+laqqqkzaLFu2jCIjI8nFxYXS0tLo4MGDpBZYNrYcjFBsNFAOHTpEK1eupMTExGbbffjhh/TMM8/Q/Pnz6cSJE7R69Wr6+OOPac6cOYY27PqMGTN4m8OHD1NSUhJlZmZSSUkJqQFGKJaDGooNBkpFRQXl5OTQqlWryNfXt9m23377LfXr148ee+wxPgJ58MEHacyYMSYjkCVLltATTzxBEydOpG7dutGKFSvIzc2N1qxZQ2qAGorlYJXHBgMlNzeXsrOzKSMjo8W2ffv2pby8PEOAnD9/njZv3kxDhgzh12tqavj9xo9lb2/Pr+/bt8/sY1ZXV1N5ebnJRUk4wZJlODnYk6sjPoLUmrX5OPD169fzaQmb8rQGG5lcv36d+vfvT4IgUF1dHU2ZMsUw5WH31dfXU1BQkMn3sesnT540+5iLFi2ihQsXkrXACMVy9RMcaWxDI5TCwkKaNm0arVu3jhdPW2PXrl308ssv09tvv82DaOPGjfTFF1/QCy+80N4+0+zZs6msrMxwYf1SEj6XxzKwwmNjIxQ2NWGF0tTUVMNtbHSxZ88eeuutt/hUxMHBdEj63HPP0dixY2nSpEn8ekJCAlVWVtLkyZPp2WefpYCAAP49xcXFJt/HrgcHB5vth7OzM79YC4xQLMMb9RPbGqGkp6dTfn4+HTlyxHDp2bMnL9CyrxuHCXP79m1eEzGmb8emQE5OTtSjRw/asWOH4f6GhgZ+vU+fPqQGqKFYBgqyNjZC8fT0pPj4eJPb3N3dyd/f33D7uHHjKCwsjNc5mKFDh/JVnJSUFL6/5OzZs3zUwm7XBwtbMh4/fjwPp169evF9K2wUw1Z91ADLxpaBKY/1k/zknAUFBSYjkrlz5/JCGvu3qKiIAgMDeZi89NJLhjajRo2ia9eu0bx58+jq1auUnJxMW7ZsuadQa63wuTyWgSmP9bMT2LxD5diysbe3Ny/Qenl5WfS5GxoEipqz2aLPqVXLHkul7MQQpbuhOeVteH/hWB6RUJC1DGedPQ2KDVS6G9ACBIpIqJ9YxgNdA8ndGR+fYe0QKCLVon5iEVkJ5rcQgHVBoIhUjyVj2Tk62NFv49RRoNc6BIpIqKHIr1+XAP4BX2D9ECgioYYiv6x4THfUAoEiEk6uJP9nGf9PNwSKWiBQRMIIRV69o/zIzx07ZNUCgSISaijy6tGp+RN4gXVBoIiEbffycnXC3hM1QaCIhCON5eXqiF9RNcH/lkioocjLBad8VBUEikioocjL1QmBoiYIFJHwucbyctYhUNQEgSISpjzywghFXRAoImHKIy8XHX5F1QT/WyJh2VheGKGoCwJFJCwbywsf7KUuCBSRUEORF5aN1QWBIlItAkVWCBR1QaCIhE8NlJcLdsqqCv63REINRV4YoagLAkUk1FDko7O3I0cH/IqqCf63REINRT5Y4VEfBIpI9dh6LxsX7EFRHQSKSNgpKx8UZNUH/2MiIVDkgymP+iBQREJRVj5Y4VEfBIpIWDaWDwJFfRAoImFjm3ww5VEfBIpIWDaWD4qy6oP/MZFQQ5EPRijqg0ARCTUU+aCGoj4IFJFwgiX5IFDUB4EiEvahyAeBoj4IFJHqMeWRDWoo6oNAEQkjFPm4OuHXU23wPyYSaijywZRHfRAoImGEIh8EivogUERCDUU+CBT1QaCIhCmPfFCUVR8EikiY8sgHW+/VB/9jImHrvXwwQlEfBIpItTgFpGxQQ1EfBIpIGKHIB4GiPggUkVBDkQ8+KF19ECgiYYQi32fyeLnolO4GtBECRaRa7EORRWb3YPJ0cVS6G9BGCBSRcApIeeT07qR0F6AdECgioYYivehAd+oT5a90N6AdECgi4Yxt0nu8dwTZ2dkp3Q1oBwSKSCjKSr+ZbURqR6W7Ae2EQBEJx/JIa1hyKHm7ohirVggUkTBCkX66A+qFQBEJy8bSSQ73ofgwb6W7AUoFyuLFi3nxbPr06U22GTRoEG/T+JKdnW1oM2HChHvuHzx4MKkBRijSwehE/dq9FfHQoUO0cuVKSkxMbLbdxo0bqaamxnC9tLSUkpKSaOTIkSbtWIC88847huvOzs6kBqihSMPHzZF+lxiidDdAiUCpqKignJwcWrVqFb344ovNtvXz8zO5vn79enJzc7snUFiABAcHk9pg2VgaI3t0xMGAWp3y5Obm8ilLRkZGm7939erVNHr0aHJ3dze5fdeuXdShQweKjY2lqVOn8pFMU6qrq6m8vNzkogRBELCxTSKPpWG6o8kRChthHD58mE952urgwYN07NgxHiqNpzsjRoygzp0707lz52jOnDmUlZVF+/btIweHe/9qLVq0iBYuXEhKQ5ZIY0BMAHUOMP0DAxoIlMLCQpo2bRpt27aNXFxc2vxkLEgSEhKoV69eJrezEYseu5/VZaKjo/moJT09/Z7HmT17Ns2YMcNwnY1QwsPDydJwciVpoBir0SlPXl4elZSUUGpqKul0On7ZvXs3vfHGG/zr+vr6Jr+3srKSj27++Mc/tvg8UVFRFBAQQGfPnjV7P6u3eHl5mVyUgBUe8UK8XSg9roPS3QAlRihstJCfn29y28SJEykuLo6efvpps9MTvU8++YTXPh5//PEWn+fSpUu8hhISYt1Vf9RPxBvTqxPpHLAdSpOB4unpSfHx8Sa3seKqv7+/4fZx48ZRWFgYr3M0nu48/PDDvG3jFSNWD3nkkUf4Kg+rocyaNYu6dOlCmZmZZM0wQhF/EqXR91t+qgrykfyUWAUFBWRvb/oX59SpU7R371766quv7mnPRjVHjx6ld999l27evEmhoaH04IMP0gsvvGD1e1HqUEMRfRKlDl5tr8WBDQcKK5w2d51hS8FsidUcV1dX2rp1K6kRpjziYCOb7cHkVQRMecRZd6CgyT80oE4IFBGwbCzO3rPX6fOjV5TuBkgIgSICRijivfCf41ReVat0N0AiCBQRUEMR79qtalry1WmluwESQaCIgBGKNN7b9xMdKypTuhsgAQSKCKihSIPl8txNx6gBAa16CBQRMEKRzpHCm7T+UKHS3QCRECgioIYirVe2nKTrFdVKdwNEQKCIgJMrSavsTi0t/vKk0t0AERAoIuD0j9L7v7xLdPDCDaW7Ae2EQBEBNRR5zN2Uj4K3SiFQRMBHaMjjdHEFrdl7QeluQDsgUETACEU+S7efocs37yjdDWgjBIoIqKHI505tPS38/EeluwFKnw9FS3pG+tHynFQyHqfoD54V7t5qfDCt/svGR9iatjH9vl8fz7j93TaNH9jc97fmOcx0pPH3Cc3d18TP1frXw7SN8W1s5QefdaweCBQRwnxc+QUAfoEpDwBIBoECAJJBoACAZBAoACAZBAoASAaBAgCSQaAAgGQQKAAgGQQKAEgGgQIAkkGgAIBkECgAIBkECgBIBoECAJKxidMX6M/DUV5ernRXAGyO/n1l7nw3Nhkot27d4v+Gh4cr3RUAm8XeZ97e3s22sRNaEztWrqGhgS5fvkyenp5kZ2dH1pz0LPQKCwvJy8uLtAqvA6nqNWARwcIkNDSU7O3tbX+Ewn7Ijh07klqwXyBr/yWyBLwOpJrXoKWRiR6KsgAgGQQKAEgGgWJBzs7ONH/+fP6vluF1IJt9DWyiKAsA1gEjFACQDAIFACSDQAEAySBQAEAyCJRGioqK6PHHHyd/f39ydXWlhIQE+u677wz3T5gwge/GNb4MHjzY5DFu3LhBOTk5fMOSj48P/fGPf6SKigqTNkePHqUBAwaQi4sL3zH56quv3tOXTz75hOLi4ngb1o/Nmzeb3M/q6fPmzaOQkBDe14yMDDpz5oyonz8yMvKen49dcnNz+f1VVVX8a/b6eHh40COPPELFxcUmj1FQUEDZ2dnk5uZGHTp0oL/97W9UV1dn0mbXrl2UmprKVzm6dOlCa9euvacvy5Yt4/1hP39aWhodPHjQ5P7W9EWu12HQoEH33DdlyhSbex3ajK3ywC9u3LghRERECBMmTBAOHDggnD9/Xti6datw9uxZQ5vx48cLgwcPFq5cuWK4sO8zxu5PSkoS9u/fL/z3v/8VunTpIowZM8Zwf1lZmRAUFCTk5OQIx44dEz766CPB1dVVWLlypaHNN998Izg4OAivvvqqcPz4cWHu3LmCo6OjkJ+fb2izePFiwdvbW9i0aZPwww8/CA899JDQuXNn4c6dO+1+DUpKSkx+tm3btrFVQGHnzp38/ilTpgjh4eHCjh07hO+++07o3bu30LdvX8P319XVCfHx8UJGRobw/fffC5s3bxYCAgKE2bNnG9qw19XNzU2YMWMG/9nefPNN/rNu2bLF0Gb9+vWCk5OTsGbNGuHHH38UnnjiCcHHx0coLi42tGmpL2K09Do88MADvE/GbcrKymzudWgrBIqRp59+Wujfv3+zbVigDBs2rMn72S8G+8U7dOiQ4bYvv/xSsLOzE4qKivj1t99+W/D19RWqq6tNnjs2NtZw/dFHHxWys7NNHjstLU3405/+xL9uaGgQgoODhb///e+G+2/evCk4OzvzgJLKtGnThOjoaP587PFZqH3yySeG+0+cOMF/3n379vHr7I1jb28vXL161dBm+fLlgpeXl+HnnTVrltC9e3eT5xk1apSQmZlpuN6rVy8hNzfXcL2+vl4IDQ0VFi1aZPhZW+qLlIxfB32gsNuastlGX4eWYMpj5N///jf17NmTRo4cyYeoKSkptGrVqnvasWEquz82NpamTp1KpaWlhvv27dvHpznscfTYVIQdb3TgwAFDm4EDB5KTk5OhTWZmJp06dYp+/vlnQxv2fcZYG3Y7c+HCBbp69apJG3a8BRsS69uIVVNTQx988AH94Q9/4EP6vLw8qq2tNXlONiXr1KmT4TnZv2x6FhQUZNJvdjDcjz/+2KqfjT0vey7jNuz1Y9f1bVrTF6k0fh301q1bRwEBARQfH0+zZ8+m27dvG+6zxdehNRAoRs6fP0/Lly+nmJgY2rp1Kw+Lv/zlL/Tuu+8a2rB6yXvvvUc7duygV155hXbv3k1ZWVlUX1/P72dvchY2xnQ6Hfn5+fH79G2Mf9EY/fWW2hjfb/x95tqItWnTJrp58yavG+mfk4UgC8zm+tXen4292e7cuUPXr1/nr2dLP39LfZFK49eBeeyxx3jI7Ny5k4fJ+++/z2tverb4OrSGTRxtLOVpENjI4uWXX+bX2Qjl2LFjtGLFCho/fjy/bfTo0Yb27C9QYmIiRUdH81FLeno62ZLVq1fzsGSHrWuZuddh8uTJJr8HrDCenp5O586d478PWoURihH2S9GtWzeT2+677z5erW9KVFQUH/aePXuWXw8ODqaSkhKTNqyyz1Z+2H36No2r8PrrLbUxvt/4+8y1EePixYu0fft2mjRpkuE29rhsGM7+WjfXr/b+bGxVjK1WsdfTwcGhxZ+/pb5IwdzrYA6bajLGvwe29Dq0FgLFSL9+/Xgdw9jp06cpIiKiye+5dOkSr6GwMGL69OnD/3PZ3Fbv66+/5qMf/S8da7Nnzx4+99Xbtm0br8n4+voa2rBplTHWht3OdO7cmf/CGLdhQ2VWp9G3EeOdd97hUze27KnXo0cPcnR0NHlO9nqxwNU/J/s3Pz/fJFRZv9mbRB/WLf1sbAjPnsu4DXv92HV9m9b0RQrmXgdzjhw5wv81/j2wpdeh1SxeBrZiBw8eFHQ6nfDSSy8JZ86cEdatW8eX9T744AN+/61bt4SZM2fy6vmFCxeE7du3C6mpqUJMTIxQVVVlsmyckpLCl5737t3L7zdeNmaVebZsPHbsWL5szJYG2fM0XjZmfXnttdd41X7+/Plml43ZEuJnn30mHD16lK8+iV021q8kdOrUia88NcaWKNl9X3/9NV+i7NOnD780Xi598MEHhSNHjvAl0MDAQLPLpX/729/4z7Zs2TKzy6VsxWrt2rV85Wzy5Mn8ZzVeNWmpL2I19TqwbQTPP/88f072e8Be/6ioKGHgwIE2+Tq0BQKlkc8//5z/IrD/xLi4OOGf//yn4b7bt2/zXxD2i8He3GzPCtsXYPyfy5SWlvIA8fDw4MuEEydO5GFkjO0bYUvU7HnCwsJ4ODS2YcMGoWvXrnwfAlte/OKLL0zuZ0uYzz33HA8n9jjp6enCqVOnRL8GbO8N+1tj7rFYWP35z3/my97szTB8+HC+B8PYTz/9JGRlZfG9NWzvxVNPPSXU1taatGH7OZKTk/nPxt6M77zzzj3PxfZlsDcKa8OWT9m+nrb2RY7XoaCggIeHn58ff93ZPiMWCmVG+1Bs6XVoC5y+AAAkgxoKAEgGgQIAkkGgAIBkECgAIBkECgBIBoECAJJBoACAZBAoACAZBAoASAaBAgCSQaAAgGQQKABAUvl/CnR3nTg3P6IAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "url = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb\"\n", + "sd.read_pyogrio(url).to_pandas().plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/flatgeobuf.md b/docs/flatgeobuf.md new file mode 100644 index 00000000..82a02844 --- /dev/null +++ b/docs/flatgeobuf.md @@ -0,0 +1,90 @@ + + +# SedonaDB + FlatGeobuf + +This page explains how to read FlatGeobuf files with SedonaDB. + +FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP. + +It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines. + +The examples on this page show you how to query FlatGeobuf files with SedonaDB over HTTP. + + +```python +import sedona.db + +sd = sedona.db.connect() +``` + +# Read Microsoft Buildings FlatGeobuf data with SedonaDB + +The Microsoft buildings dataset is a comprehensive open dataset of building footprints extracted from satellite imagery using computer vision and deep learning. + +Here's how to read the Microsoft buildings dataset into a SedonaDB DataFrame and print a few rows. + + +```python +url = "https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/microsoft-buildings_point.fgb.zip" +df = sd.read_pyogrio(url) +df.show(3) +``` + + ┌─────────────────────────────────┐ + │ wkb_geometry │ + │ geometry │ + ╞═════════════════════════════════╡ + │ POINT(-97.16154292 26.08759861) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ POINT(-97.1606625 26.08481) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ POINT(-97.16133375 26.08519809) │ + └─────────────────────────────────┘ + + +You can see that the Microsoft Buildings dataset contains the building centroids. + +Take a look at the schema and see how it contains the `wkb_geometry` column and the CRS. + + +```python +df.schema +``` + + + + + SedonaSchema with 1 field: + wkb_geometry: geometry + + + +Now lets see how to read another FlatGeobuf dataset. + +# Read Vermont boundary FlatGeobuf data with SedonaDB + +The Vermont boundary dataset contains the polygon for the state of Vermont. + +The following example shows how to read the Vermont FlatGeobuf dataset and plot it. + +```python +url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb" +sd.read_pyogrio(url).to_pandas().plot() +``` diff --git a/mkdocs.yml b/mkdocs.yml index f169c866..0cf91486 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -51,6 +51,7 @@ nav: - CRS Examples: crs-examples.md - Delta Lake: delta-lake.md - Iceberg: iceberg.md + - FlatGeobuf: flatgeobuf.md - Working with Parquet Files: working-with-parquet-files.md - Working with SQL in SedonaDB: working-with-sql-sedonadb.md - Contributors Guide: contributors-guide.md From 58748a32b20b6fdac56583671eb7452b92cc5b6f Mon Sep 17 00:00:00 2001 From: Matthew Powers Date: Mon, 15 Dec 2025 13:18:31 -0500 Subject: [PATCH 2/2] address pull request comments --- docs/flatgeobuf.ipynb | 64 ++++++++++++++++++++++++++++++++++--- docs/flatgeobuf.md | 73 +++++++++++++++++++++++++++++-------------- 2 files changed, 109 insertions(+), 28 deletions(-) diff --git a/docs/flatgeobuf.ipynb b/docs/flatgeobuf.ipynb index ae59bccd..3be1e612 100644 --- a/docs/flatgeobuf.ipynb +++ b/docs/flatgeobuf.ipynb @@ -13,16 +13,16 @@ "\n", "This page explains how to read FlatGeobuf files with SedonaDB.\n", "\n", - "FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP.\n", + "FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP. It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines.\n", "\n", - "It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines.\n", + "SedonaDB is well-suited for reading FlatGeobuf files because it can leverage the FlatGeobuf index to read only a portion of the file.\n", "\n", "The examples on this page show you how to query FlatGeobuf files with SedonaDB over HTTP." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "a746c47d", "metadata": { "vscode": { @@ -41,7 +41,7 @@ "id": "87c9bf67-cb6c-445c-8199-727bacbb412e", "metadata": {}, "source": [ - "# Read Microsoft Buildings FlatGeobuf data with SedonaDB\n", + "## Read Microsoft Buildings FlatGeobuf data with SedonaDB\n", "\n", "The Microsoft buildings dataset is a comprehensive open dataset of building footprints extracted from satellite imagery using computer vision and deep learning.\n", "\n", @@ -126,7 +126,7 @@ "id": "d30ab78a-3692-48ea-836c-ed31d497a5fd", "metadata": {}, "source": [ - "# Read Vermont boundary FlatGeobuf data with SedonaDB\n", + "## Read Vermont boundary FlatGeobuf data with SedonaDB\n", "\n", "The Vermont boundary dataset contains the polygon for the state of Vermont.\n", "\n", @@ -168,6 +168,60 @@ "url = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb\"\n", "sd.read_pyogrio(url).to_pandas().plot()" ] + }, + { + "cell_type": "markdown", + "id": "23ec9af7-ec3b-45c8-a589-4d92d0cb9c02", + "metadata": {}, + "source": [ + "## Read a portion of a large remote FlatGeobuf file\n", + "\n", + "Now let's look at how to read a portion of a 12GB FlatGeobuf file." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d887c499-a5d9-4f25-9875-851525b5c88d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌──────────────────────────────────┐\n", + "│ sum(population_areas.population) │\n", + "│ int64 │\n", + "╞══════════════════════════════════╡\n", + "│ 256251 │\n", + "└──────────────────────────────────┘\n", + "CPU times: user 16 ms, sys: 15.3 ms, total: 31.4 ms\n", + "Wall time: 493 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "url = \"https://flatgeobuf.septima.dk/population_areas.fgb\"\n", + "sd.read_pyogrio(url).to_view(\"population_areas\", True)\n", + "\n", + "wkt = \"POLYGON ((-73.978329 40.767412, -73.950005 40.767412, -73.950005 40.795098, -73.978329 40.795098, -73.978329 40.767412))\"\n", + "sd.sql(\n", + " f\"\"\"\n", + "SELECT sum(population::INTEGER) FROM population_areas\n", + "WHERE ST_Intersects(wkb_geometry, ST_SetSRID(ST_GeomFromWKT('{wkt}'), 4326))\n", + "\"\"\"\n", + ").show()" + ] + }, + { + "cell_type": "markdown", + "id": "ef6cf480-f4f5-4a9f-9f52-6370fc41af29", + "metadata": {}, + "source": [ + "SedonaDB can query the 12GB FlatGeobuf file in about half of a second on a laptop for this area of interest." + ] } ], "metadata": { diff --git a/docs/flatgeobuf.md b/docs/flatgeobuf.md index 82a02844..1943baa1 100644 --- a/docs/flatgeobuf.md +++ b/docs/flatgeobuf.md @@ -1,29 +1,10 @@ - - # SedonaDB + FlatGeobuf This page explains how to read FlatGeobuf files with SedonaDB. -FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP. +FlatGeobuf is a cloud-optimized binary format for geographic vector data designed for fast streaming and spatial filtering over HTTP. It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines. -It has a built-in spatial index, is easily compactible, contains CRS information, and is supported by many engines. +SedonaDB is well-suited for reading FlatGeobuf files because it can leverage the FlatGeobuf index to read only a portion of the file. The examples on this page show you how to query FlatGeobuf files with SedonaDB over HTTP. @@ -34,7 +15,7 @@ import sedona.db sd = sedona.db.connect() ``` -# Read Microsoft Buildings FlatGeobuf data with SedonaDB +## Read Microsoft Buildings FlatGeobuf data with SedonaDB The Microsoft buildings dataset is a comprehensive open dataset of building footprints extracted from satellite imagery using computer vision and deep learning. @@ -78,13 +59,59 @@ df.schema Now lets see how to read another FlatGeobuf dataset. -# Read Vermont boundary FlatGeobuf data with SedonaDB +## Read Vermont boundary FlatGeobuf data with SedonaDB The Vermont boundary dataset contains the polygon for the state of Vermont. The following example shows how to read the Vermont FlatGeobuf dataset and plot it. + ```python url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb" sd.read_pyogrio(url).to_pandas().plot() ``` + + + + + + + + + + +![png](flatgeobuf_files/flatgeobuf_8_1.png) + + + +## Read a portion of a large remote FlatGeobuf file + +Now let's look at how to read a portion of a 12GB FlatGeobuf file. + + +```python +%%time + +url = "https://flatgeobuf.septima.dk/population_areas.fgb" +sd.read_pyogrio(url).to_view("population_areas", True) + +wkt = "POLYGON ((-73.978329 40.767412, -73.950005 40.767412, -73.950005 40.795098, -73.978329 40.795098, -73.978329 40.767412))" +sd.sql( + f""" +SELECT sum(population::INTEGER) FROM population_areas +WHERE ST_Intersects(wkb_geometry, ST_SetSRID(ST_GeomFromWKT('{wkt}'), 4326)) +""" +).show() +``` + + ┌──────────────────────────────────┐ + │ sum(population_areas.population) │ + │ int64 │ + ╞══════════════════════════════════╡ + │ 256251 │ + └──────────────────────────────────┘ + CPU times: user 16 ms, sys: 15.3 ms, total: 31.4 ms + Wall time: 493 ms + + +SedonaDB can query the 12GB FlatGeobuf file in about half of a second on a laptop for this area of interest.