class Aws::SageMaker::Types::CreateProcessingJobRequest
@note When making an API call, you may pass CreateProcessingJobRequest
data as a hash: { processing_inputs: [ { input_name: "String", # required app_managed: false, s3_input: { s3_uri: "S3Uri", # required local_path: "ProcessingLocalPath", s3_data_type: "ManifestFile", # required, accepts ManifestFile, S3Prefix s3_input_mode: "Pipe", # accepts Pipe, File s3_data_distribution_type: "FullyReplicated", # accepts FullyReplicated, ShardedByS3Key s3_compression_type: "None", # accepts None, Gzip }, dataset_definition: { athena_dataset_definition: { catalog: "AthenaCatalog", # required database: "AthenaDatabase", # required query_string: "AthenaQueryString", # required work_group: "AthenaWorkGroup", output_s3_uri: "S3Uri", # required kms_key_id: "KmsKeyId", output_format: "PARQUET", # required, accepts PARQUET, ORC, AVRO, JSON, TEXTFILE output_compression: "GZIP", # accepts GZIP, SNAPPY, ZLIB }, redshift_dataset_definition: { cluster_id: "RedshiftClusterId", # required database: "RedshiftDatabase", # required db_user: "RedshiftUserName", # required query_string: "RedshiftQueryString", # required cluster_role_arn: "RoleArn", # required output_s3_uri: "S3Uri", # required kms_key_id: "KmsKeyId", output_format: "PARQUET", # required, accepts PARQUET, CSV output_compression: "None", # accepts None, GZIP, BZIP2, ZSTD, SNAPPY }, local_path: "ProcessingLocalPath", data_distribution_type: "FullyReplicated", # accepts FullyReplicated, ShardedByS3Key input_mode: "Pipe", # accepts Pipe, File }, }, ], processing_output_config: { outputs: [ # required { output_name: "String", # required s3_output: { s3_uri: "S3Uri", # required local_path: "ProcessingLocalPath", # required s3_upload_mode: "Continuous", # required, accepts Continuous, EndOfJob }, feature_store_output: { feature_group_name: "FeatureGroupName", # required }, app_managed: false, }, ], kms_key_id: "KmsKeyId", }, processing_job_name: "ProcessingJobName", # required processing_resources: { # required cluster_config: { # required instance_count: 1, # required instance_type: "ml.t3.medium", # required, accepts ml.t3.medium, ml.t3.large, ml.t3.xlarge, ml.t3.2xlarge, ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.r5.large, ml.r5.xlarge, ml.r5.2xlarge, ml.r5.4xlarge, ml.r5.8xlarge, ml.r5.12xlarge, ml.r5.16xlarge, ml.r5.24xlarge, ml.g4dn.xlarge, ml.g4dn.2xlarge, ml.g4dn.4xlarge, ml.g4dn.8xlarge, ml.g4dn.12xlarge, ml.g4dn.16xlarge volume_size_in_gb: 1, # required volume_kms_key_id: "KmsKeyId", }, }, stopping_condition: { max_runtime_in_seconds: 1, # required }, app_specification: { # required image_uri: "ImageUri", # required container_entrypoint: ["ContainerEntrypointString"], container_arguments: ["ContainerArgument"], }, environment: { "ProcessingEnvironmentKey" => "ProcessingEnvironmentValue", }, network_config: { enable_inter_container_traffic_encryption: false, enable_network_isolation: false, vpc_config: { security_group_ids: ["SecurityGroupId"], # required subnets: ["SubnetId"], # required }, }, role_arn: "RoleArn", # required tags: [ { key: "TagKey", # required value: "TagValue", # required }, ], experiment_config: { experiment_name: "ExperimentEntityName", trial_name: "ExperimentEntityName", trial_component_display_name: "ExperimentEntityName", }, }
@!attribute [rw] processing_inputs
An array of inputs configuring the data to download into the processing container. @return [Array<Types::ProcessingInput>]
@!attribute [rw] processing_output_config
Output configuration for the processing job. @return [Types::ProcessingOutputConfig]
@!attribute [rw] processing_job_name
The name of the processing job. The name must be unique within an Amazon Web Services Region in the Amazon Web Services account. @return [String]
@!attribute [rw] processing_resources
Identifies the resources, ML compute instances, and ML storage volumes to deploy for a processing job. In distributed training, you specify more than one instance. @return [Types::ProcessingResources]
@!attribute [rw] stopping_condition
The time limit for how long the processing job is allowed to run. @return [Types::ProcessingStoppingCondition]
@!attribute [rw] app_specification
Configures the processing job to run a specified Docker container image. @return [Types::AppSpecification]
@!attribute [rw] environment
The environment variables to set in the Docker container. Up to 100 key and values entries in the map are supported. @return [Hash<String,String>]
@!attribute [rw] network_config
Networking options for a processing job, such as whether to allow inbound and outbound network calls to and from processing containers, and the VPC subnets and security groups to use for VPC-enabled processing jobs. @return [Types::NetworkConfig]
@!attribute [rw] role_arn
The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on your behalf. @return [String]
@!attribute [rw] tags
(Optional) An array of key-value pairs. For more information, see [Using Cost Allocation Tags][1] in the *Amazon Web Services Billing and Cost Management User Guide*. [1]: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/cost-alloc-tags.html#allocation-whatURL @return [Array<Types::Tag>]
@!attribute [rw] experiment_config
Associates a SageMaker job as a trial component with an experiment and trial. Specified when you call the following APIs: * CreateProcessingJob * CreateTrainingJob * CreateTransformJob @return [Types::ExperimentConfig]
@see docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/CreateProcessingJobRequest AWS API Documentation
Constants
- SENSITIVE