Skip to content

Commit 91cb0f7

Browse files
authored
feat(dataset): importer (#335)
* feat(dataset): importer * fix: addressed code review comments * fixup * set clip help colors version color * update package versions * update package versions * update package versions * update package versions * addressed cde review comments * reset piplock file * Revert "reset piplock file" This reverts commit 6cc0f80. * review comments * review comments * fix * fix address * fix code review comments * revert lock file * revert lock file * chore: fix spacing * change click help colors version * change click help colors version * setting specific version * pls * setup.py click number * reset pipfile * fix equals signs * fix(dataset): importer command wrapped in JobCommand to make use of job create experience * fix: pip lock file * remove toml file * fix: http basic auth * base64 basic auth commands * error handling
1 parent 5949823 commit 91cb0f7

File tree

7 files changed

+204
-4
lines changed

7 files changed

+204
-4
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ progressbar2 = "*"
1111
cryptography = {extras = ["security"]}
1212
six = "*"
1313
gradient-statsd = "*"
14-
click = "*"
14+
click = "7.1.2"
1515
terminaltables = "*"
1616
click-didyoumean = "*"
1717
click-help-colors = "*"

gradient/api_sdk/clients/secret_client.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,19 @@ def delete(self, entity, entity_id, name):
5757

5858
repository = self.build_repository(repositories.DeleteSecret)
5959
repository.delete(entity=entity, entity_id=entity_id, name=name)
60+
61+
def ephemeral(self, key, value, expires_in):
62+
"""Create ephemeral secret.
63+
64+
:param str key: secret key
65+
:param str value: secret value
66+
:param str expires_in: seconds secrets expire in
67+
68+
:returns:
69+
:rtype: str
70+
"""
71+
72+
repository = self.build_repository(repositories.EphemeralSecret)
73+
ephemeral_secret = repository.create(key=key, value=value, expires_in=expires_in)
74+
return ephemeral_secret
75+

gradient/api_sdk/repositories/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from .notebooks import CreateNotebook, DeleteNotebook, GetNotebook, ListNotebooks, GetNotebookMetrics, ListNotebookMetrics, \
2020
StreamNotebookMetrics, StopNotebook, StartNotebook, ForkNotebook, ListNotebookArtifacts, ListNotebookLogs
2121
from .projects import CreateProject, ListProjects, DeleteProject, GetProject
22-
from .secrets import ListSecrets, SetSecret, DeleteSecret
22+
from .secrets import ListSecrets, SetSecret, DeleteSecret, EphemeralSecret
2323
from .storage_providers import ListStorageProviders, CreateStorageProvider, DeleteStorageProvider, \
2424
GetStorageProvider, UpdateStorageProvider
2525
from .tensorboards import CreateTensorboard, GetTensorboard, ListTensorboards, UpdateTensorboard, DeleteTensorboard

gradient/api_sdk/repositories/secrets.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,20 @@ def _send_request(self, client, url, json=None, params=None):
5252

5353
def delete(self, **kwargs):
5454
response = self._get(**kwargs)
55-
self._validate_response(response)
55+
self._validate_response(response)
56+
57+
class EphemeralSecret(SecretsMixin, BaseRepository):
58+
def get_request_url(self, **kwargs):
59+
return "/secrets/ephemeral?expiresIn={}".format(kwargs.get("expires_in"))
60+
61+
def _get_request_json(self, kwargs):
62+
return { kwargs.get("key"): kwargs.get("value") }
63+
64+
def _send_request(self, client, url, json=None, params=None):
65+
response = client.post(url, json=json)
66+
return response
67+
68+
def create(self, **kwargs):
69+
response = self._get(**kwargs)
70+
self._validate_response(response)
71+
return response.data

gradient/cli/datasets.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
from gradient.cli.cli import cli
55
from gradient.cli.common import ClickGroup, api_key_option
66
from gradient.commands import datasets as commands
7+
from gradient.cli import common
8+
from gradient.cli.jobs import get_workspace_handler
9+
from gradient.cli.common import (
10+
api_key_option, del_if_value_is_none, ClickGroup, jsonify_dicts,
11+
validate_comma_split_option,
12+
)
713

814
EXAMPLE_ID = 'dsr8k5qzn401lb5'
915
EXAMPLE_VERSION = 'klfoyy9'
@@ -143,6 +149,65 @@ def update_dataset(
143149
description=description,
144150
)
145151

152+
@datasets.command("import", help="Import dataset")
153+
@click.option(
154+
"--clusterId",
155+
"cluster_id",
156+
help="Cluster ID",
157+
cls=common.GradientOption,
158+
required=True,
159+
)
160+
@click.option(
161+
"--machineType",
162+
"machine_type",
163+
help="Virtual machine type",
164+
cls=common.GradientOption,
165+
required=True,
166+
)
167+
@click.option(
168+
"--datasetId",
169+
"dataset_id",
170+
help="Dataset ID",
171+
cls=common.GradientOption,
172+
required=True,
173+
)
174+
@click.option(
175+
"--s3Url",
176+
"s3_url",
177+
help="S3 URL https://s3-us-east-1.amazonaws.com/bucket/path",
178+
cls=common.GradientOption,
179+
)
180+
@click.option(
181+
"--httpUrl",
182+
"http_url",
183+
help="HTTP/S URL https://data.something.org/all_my_data.zip}}",
184+
cls=common.GradientOption,
185+
)
186+
@click.option(
187+
"--httpAuth",
188+
"http_auth",
189+
help="Http Auth username:password",
190+
cls=common.GradientOption,
191+
)
192+
@click.option(
193+
"--s3AccessKey",
194+
"access_key",
195+
help="S3 access key",
196+
cls=common.GradientOption,
197+
)
198+
@click.option(
199+
"--s3SecretKey",
200+
"secret_key",
201+
help="S3 secret key",
202+
cls=common.GradientOption,
203+
)
204+
@api_key_option
205+
@common.options_file
206+
def import_dataset(cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key, api_key, options_file):
207+
validate_dataset_id(dataset_id)
208+
209+
command = commands.ImportDatasetCommand(api_key=api_key, workspace_handler=get_workspace_handler())
210+
command.execute(cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key)
146211

147212
@datasets.command("delete", help="Delete dataset")
148213
@click.option(

gradient/commands/datasets.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,37 @@
55
import re
66
import threading
77
import uuid
8+
import json
89
try:
910
import queue
1011
except ImportError:
1112
import Queue as queue
1213
from xml.etree import ElementTree
14+
from urllib.parse import urlparse
1315

1416
import halo
1517
import requests
1618
import six
1719

1820
from gradient import api_sdk
1921
from gradient.api_sdk.sdk_exceptions import ResourceFetchingError
22+
from gradient.api_sdk.utils import base64_encode
2023
from gradient.cli_constants import CLI_PS_CLIENT_NAME
24+
from gradient.cli.jobs import get_workspace_handler
25+
from gradient.commands import jobs as jobs_commands
2126
from gradient.commands.common import BaseCommand, DetailsCommandMixin, ListCommandPagerMixin
27+
from gradient.commands.jobs import BaseCreateJobCommandMixin, BaseJobCommand, CreateJobCommand
2228
from gradient.exceptions import ApplicationError
2329

2430
S3_XMLNS = 'http://s3.amazonaws.com/doc/2006-03-01/'
31+
DATASET_IMPORTER_IMAGE = "paperspace/dataset-importer:latest"
32+
PROJECT_NAME = "Job Builder"
33+
SUPPORTED_URL = ['https', 'http']
34+
IMPORTER_COMMAND = "go-getter"
35+
HTTP_SECRET = "HTTP_AUTH"
36+
S3_ACCESS_KEY = "AWS_ACCESS_KEY_ID"
37+
S3_SECRET_KEY = "AWS_SECRET_ACCESS_KEY"
38+
S3_REGION_KEY = "AWS_DEFAULT_REGION"
2539

2640

2741
class WorkerPool(object):
@@ -676,3 +690,92 @@ def update_status():
676690
for pre_signed in pre_signeds:
677691
update_status()
678692
pool.put(self._delete, url=pre_signed.url)
693+
694+
695+
class ImportDatasetCommand(BaseCreateJobCommandMixin, BaseJobCommand):
696+
def create_secret(self, key, value, expires_in=86400):
697+
client = api_sdk.clients.SecretsClient(
698+
api_key=self.api_key,
699+
logger=self.logger,
700+
ps_client_name=CLI_PS_CLIENT_NAME,
701+
)
702+
703+
response = client.ephemeral(key, value, expires_in)
704+
return response
705+
706+
def get_command(self, s3_url, http_url, http_auth):
707+
command = "%s %s /data/output" % (IMPORTER_COMMAND, (s3_url or http_url))
708+
if s3_url:
709+
command = "%s s3::%s /data/output" % (IMPORTER_COMMAND, s3_url)
710+
711+
if http_url and http_auth is not None:
712+
url = urlparse(http_url)
713+
command_string = "%s https://${{HTTP_AUTH}}@%s /data/output" % (IMPORTER_COMMAND, url.path)
714+
command = base64_encode(command_string)
715+
716+
return command
717+
718+
def get_env_vars(self, s3_url, http_url, secrets):
719+
if s3_url is not None:
720+
if secrets[S3_ACCESS_KEY] is None or secrets[S3_SECRET_KEY] is None:
721+
self.logger.log('s3AccessKey and s3SecretKey required')
722+
return
723+
724+
access_key_secret = self.create_secret(S3_ACCESS_KEY, secrets[S3_ACCESS_KEY])
725+
secret_key_secret = self.create_secret(S3_SECRET_KEY, secrets[S3_SECRET_KEY])
726+
727+
access_key_value = "secret:ephemeral:%s" % access_key_secret[S3_ACCESS_KEY]
728+
secret_key_value = "secret:ephemeral:%s" % secret_key_secret[S3_SECRET_KEY]
729+
730+
return {
731+
S3_ACCESS_KEY: access_key_value,
732+
S3_SECRET_KEY: secret_key_value,
733+
}
734+
735+
if http_url and secrets[S3_ACCESS_KEY] is not None:
736+
http_auth_secret = self.create_secret(HTTP_SECRET, secrets[HTTP_SECRET])
737+
return {
738+
HTTP_SECRET: http_auth_secret
739+
}
740+
741+
return ""
742+
743+
def _create(self, workflow):
744+
client = api_sdk.clients.JobsClient(
745+
api_key=self.api_key,
746+
ps_client_name=CLI_PS_CLIENT_NAME,
747+
)
748+
return self.client.create(**workflow)
749+
750+
751+
def execute(self, cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key):
752+
if s3_url is None and http_url is None:
753+
self.logger.log('Error: --s3Url or --httpUrl required')
754+
return
755+
756+
workflow = {
757+
"cluster_id": cluster_id,
758+
"container": DATASET_IMPORTER_IMAGE,
759+
"machine_type": machine_type,
760+
"project": PROJECT_NAME,
761+
"datasets": [{ "id": dataset_id, "name": "output", "output": True }],
762+
"project_id": None
763+
}
764+
765+
dataset_url = s3_url or http_url
766+
767+
url = urlparse(dataset_url)
768+
if url.scheme not in SUPPORTED_URL:
769+
self.logger.log('Invalid URL format supported [{}] Format:{} URL:{}'.format(','.join(SUPPORTED_URL), url.scheme, dataset_url))
770+
return
771+
772+
command = self.get_command(s3_url, http_url, http_auth)
773+
if command:
774+
workflow["command"] = command
775+
776+
env_vars = self.get_env_vars(s3_url, http_url, { HTTP_SECRET: http_auth, S3_ACCESS_KEY: access_key, S3_SECRET_KEY: secret_key })
777+
if env_vars:
778+
workflow["env_vars"] = env_vars
779+
780+
command = CreateJobCommand(api_key=self.api_key, workspace_handler=get_workspace_handler())
781+
command.execute(workflow)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def run(self):
5656
install_requires=[
5757
'requests[security]',
5858
'six',
59-
'click>=7.0',
59+
'click==7.1.2',
6060
'terminaltables',
6161
'click-didyoumean',
6262
'click-help-colors',

0 commit comments

Comments
 (0)