Skip to content

Commit f16f1c4

Browse files
feat(storage): install iceberg-js and add from method (#1881)
Co-authored-by: depthfirst-app[bot] <184448029+depthfirst-app[bot]@users.noreply.github.com>
1 parent 3461864 commit f16f1c4

File tree

7 files changed

+599
-2
lines changed

7 files changed

+599
-2
lines changed

package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/core/storage-js/README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,70 @@ if (error) {
488488

489489
> **Note:** A bucket cannot be deleted if it contains data. You must empty the bucket first.
490490
491+
#### Get Iceberg Catalog for Advanced Operations
492+
493+
For advanced operations like creating tables, namespaces, and querying Iceberg metadata, use the `from()` method to get a configured [iceberg-js](https://github.com/supabase/iceberg-js) client:
494+
495+
```typescript
496+
// Get an Iceberg REST Catalog client for your analytics bucket
497+
const catalog = analytics.from('analytics-data')
498+
499+
// Create a namespace
500+
await catalog.createNamespace({ namespace: ['default'] }, { properties: { owner: 'data-team' } })
501+
502+
// Create a table with schema
503+
await catalog.createTable(
504+
{ namespace: ['default'] },
505+
{
506+
name: 'events',
507+
schema: {
508+
type: 'struct',
509+
fields: [
510+
{ id: 1, name: 'id', type: 'long', required: true },
511+
{ id: 2, name: 'timestamp', type: 'timestamp', required: true },
512+
{ id: 3, name: 'user_id', type: 'string', required: false },
513+
],
514+
'schema-id': 0,
515+
'identifier-field-ids': [1],
516+
},
517+
'partition-spec': {
518+
'spec-id': 0,
519+
fields: [],
520+
},
521+
'write-order': {
522+
'order-id': 0,
523+
fields: [],
524+
},
525+
properties: {
526+
'write.format.default': 'parquet',
527+
},
528+
}
529+
)
530+
531+
// List tables in namespace
532+
const tables = await catalog.listTables({ namespace: ['default'] })
533+
console.log(tables) // [{ namespace: ['default'], name: 'events' }]
534+
535+
// Load table metadata
536+
const table = await catalog.loadTable({ namespace: ['default'], name: 'events' })
537+
538+
// Update table properties
539+
await catalog.updateTable(
540+
{ namespace: ['default'], name: 'events' },
541+
{ properties: { 'read.split.target-size': '134217728' } }
542+
)
543+
544+
// Drop table
545+
await catalog.dropTable({ namespace: ['default'], name: 'events' })
546+
547+
// Drop namespace
548+
await catalog.dropNamespace({ namespace: ['default'] })
549+
```
550+
551+
**Returns:** `IcebergRestCatalog` instance from [iceberg-js](https://github.com/supabase/iceberg-js)
552+
553+
> **Note:** The `from()` method returns an Iceberg REST Catalog client that provides full access to the Apache Iceberg REST API. For complete documentation of available operations, see the [iceberg-js documentation](https://supabase.github.io/iceberg-js/).
554+
491555
### Error Handling
492556

493557
Analytics buckets use the same error handling pattern as the rest of the Storage SDK:

packages/core/storage-js/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"docs:json": "typedoc --json docs/v2/spec.json --entryPoints src/index.ts --entryPoints src/packages/* --excludePrivate --excludeExternals --excludeProtected"
3838
},
3939
"dependencies": {
40+
"iceberg-js": "^0.8.0",
4041
"tslib": "2.8.1"
4142
},
4243
"devDependencies": {

packages/core/storage-js/src/lib/helpers.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,47 @@ export const isPlainObject = (value: object): boolean => {
4646
!(Symbol.iterator in value)
4747
)
4848
}
49+
50+
/**
51+
* Validates if a given bucket name is valid according to Supabase Storage API rules
52+
* Mirrors backend validation from: storage/src/storage/limits.ts:isValidBucketName()
53+
*
54+
* Rules:
55+
* - Length: 1-100 characters
56+
* - Allowed characters: alphanumeric (a-z, A-Z, 0-9), underscore (_), and safe special characters
57+
* - Safe special characters: ! - . * ' ( ) space & $ @ = ; : + , ?
58+
* - Forbidden: path separators (/, \), path traversal (..), leading/trailing whitespace
59+
*
60+
* AWS S3 Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
61+
*
62+
* @param bucketName - The bucket name to validate
63+
* @returns true if valid, false otherwise
64+
*/
65+
export const isValidBucketName = (bucketName: string): boolean => {
66+
if (!bucketName || typeof bucketName !== 'string') {
67+
return false
68+
}
69+
70+
// Check length constraints (1-100 characters)
71+
if (bucketName.length === 0 || bucketName.length > 100) {
72+
return false
73+
}
74+
75+
// Check for leading/trailing whitespace
76+
if (bucketName.trim() !== bucketName) {
77+
return false
78+
}
79+
80+
// Explicitly reject path separators (security)
81+
// Note: Consecutive periods (..) are allowed by backend - the AWS restriction
82+
// on relative paths applies to object keys, not bucket names
83+
if (bucketName.includes('/') || bucketName.includes('\\')) {
84+
return false
85+
}
86+
87+
// Validate against allowed character set
88+
// Pattern matches backend regex: /^(\w|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/
89+
// This explicitly excludes path separators (/, \) and other problematic characters
90+
const bucketNameRegex = /^[\w!.\*'() &$@=;:+,?-]+$/
91+
return bucketNameRegex.test(bucketName)
92+
}

packages/core/storage-js/src/packages/StorageAnalyticsClient.ts

Lines changed: 161 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
import { IcebergRestCatalog } from 'iceberg-js'
12
import { DEFAULT_HEADERS } from '../lib/constants'
23
import { isStorageError, StorageError } from '../lib/errors'
34
import { Fetch, get, post, remove } from '../lib/fetch'
4-
import { resolveFetch } from '../lib/helpers'
5+
import { isValidBucketName, resolveFetch } from '../lib/helpers'
56
import { AnalyticBucket } from '../lib/types'
67

78
/**
@@ -261,4 +262,163 @@ export default class StorageAnalyticsClient {
261262
throw error
262263
}
263264
}
265+
266+
/**
267+
* @alpha
268+
*
269+
* Get an Iceberg REST Catalog client configured for a specific analytics bucket
270+
* Use this to perform advanced table and namespace operations within the bucket
271+
* The returned client provides full access to the Apache Iceberg REST Catalog API
272+
*
273+
* **Public alpha:** This API is part of a public alpha release and may not be available to your account type.
274+
*
275+
* @category Analytics Buckets
276+
* @param bucketName - The name of the analytics bucket (warehouse) to connect to
277+
* @returns Configured IcebergRestCatalog instance for advanced Iceberg operations
278+
*
279+
* @example Get catalog and create table
280+
* ```js
281+
* // First, create an analytics bucket
282+
* const { data: bucket, error: bucketError } = await supabase
283+
* .storage
284+
* .analytics
285+
* .createBucket('analytics-data')
286+
*
287+
* // Get the Iceberg catalog for that bucket
288+
* const catalog = supabase.storage.analytics.from('analytics-data')
289+
*
290+
* // Create a namespace
291+
* await catalog.createNamespace({ namespace: ['default'] })
292+
*
293+
* // Create a table with schema
294+
* await catalog.createTable(
295+
* { namespace: ['default'] },
296+
* {
297+
* name: 'events',
298+
* schema: {
299+
* type: 'struct',
300+
* fields: [
301+
* { id: 1, name: 'id', type: 'long', required: true },
302+
* { id: 2, name: 'timestamp', type: 'timestamp', required: true },
303+
* { id: 3, name: 'user_id', type: 'string', required: false }
304+
* ],
305+
* 'schema-id': 0,
306+
* 'identifier-field-ids': [1]
307+
* },
308+
* 'partition-spec': {
309+
* 'spec-id': 0,
310+
* fields: []
311+
* },
312+
* 'write-order': {
313+
* 'order-id': 0,
314+
* fields: []
315+
* },
316+
* properties: {
317+
* 'write.format.default': 'parquet'
318+
* }
319+
* }
320+
* )
321+
* ```
322+
*
323+
* @example List tables in namespace
324+
* ```js
325+
* const catalog = supabase.storage.analytics.from('analytics-data')
326+
*
327+
* // List all tables in the default namespace
328+
* const tables = await catalog.listTables({ namespace: ['default'] })
329+
* console.log(tables) // [{ namespace: ['default'], name: 'events' }]
330+
* ```
331+
*
332+
* @example Working with namespaces
333+
* ```js
334+
* const catalog = supabase.storage.analytics.from('analytics-data')
335+
*
336+
* // List all namespaces
337+
* const namespaces = await catalog.listNamespaces()
338+
*
339+
* // Create namespace with properties
340+
* await catalog.createNamespace(
341+
* { namespace: ['production'] },
342+
* { properties: { owner: 'data-team', env: 'prod' } }
343+
* )
344+
* ```
345+
*
346+
* @example Cleanup operations
347+
* ```js
348+
* const catalog = supabase.storage.analytics.from('analytics-data')
349+
*
350+
* // Drop table with purge option (removes all data)
351+
* await catalog.dropTable(
352+
* { namespace: ['default'], name: 'events' },
353+
* { purge: true }
354+
* )
355+
*
356+
* // Drop namespace (must be empty)
357+
* await catalog.dropNamespace({ namespace: ['default'] })
358+
* ```
359+
*
360+
* @example Error handling with catalog operations
361+
* ```js
362+
* import { IcebergError } from 'iceberg-js'
363+
*
364+
* const catalog = supabase.storage.analytics.from('analytics-data')
365+
*
366+
* try {
367+
* await catalog.dropTable({ namespace: ['default'], name: 'events' }, { purge: true })
368+
* } catch (error) {
369+
* // Handle 404 errors (resource not found)
370+
* const is404 =
371+
* (error instanceof IcebergError && error.status === 404) ||
372+
* error?.status === 404 ||
373+
* error?.details?.error?.code === 404
374+
*
375+
* if (is404) {
376+
* console.log('Table does not exist')
377+
* } else {
378+
* throw error // Re-throw other errors
379+
* }
380+
* }
381+
* ```
382+
*
383+
* @remarks
384+
* This method provides a bridge between Supabase's bucket management and the standard
385+
* Apache Iceberg REST Catalog API. The bucket name maps to the Iceberg warehouse parameter.
386+
* All authentication and configuration is handled automatically using your Supabase credentials.
387+
*
388+
* **Error Handling**: Operations may throw `IcebergError` from the iceberg-js library.
389+
* Always handle 404 errors gracefully when checking for resource existence.
390+
*
391+
* **Cleanup Operations**: When using `dropTable`, the `purge: true` option permanently
392+
* deletes all table data. Without it, the table is marked as deleted but data remains.
393+
*
394+
* **Library Dependency**: The returned catalog is an instance of `IcebergRestCatalog`
395+
* from iceberg-js. For complete API documentation and advanced usage, refer to the
396+
* [iceberg-js documentation](https://supabase.github.io/iceberg-js/).
397+
*
398+
* For advanced Iceberg operations beyond bucket management, you can also install and use
399+
* the `iceberg-js` package directly with manual configuration.
400+
*/
401+
from(bucketName: string): IcebergRestCatalog {
402+
// Validate bucket name using same rules as Supabase Storage API backend
403+
if (!isValidBucketName(bucketName)) {
404+
throw new StorageError(
405+
'Invalid bucket name: File, folder, and bucket names must follow AWS object key naming guidelines ' +
406+
'and should avoid the use of any other characters.'
407+
)
408+
}
409+
410+
// Construct the Iceberg REST Catalog URL
411+
// The base URL is /storage/v1/iceberg
412+
// Note: IcebergRestCatalog from iceberg-js automatically adds /v1/ prefix to API paths
413+
// so we should NOT append /v1 here (it would cause double /v1/v1/ in the URL)
414+
return new IcebergRestCatalog({
415+
baseUrl: this.url,
416+
catalogName: bucketName, // Maps to the warehouse parameter in Supabase's implementation
417+
auth: {
418+
type: 'custom',
419+
getHeaders: async () => this.headers,
420+
},
421+
fetch: this.fetch,
422+
})
423+
}
264424
}

0 commit comments

Comments
 (0)