class Aws::SageMaker::Types::CreateProcessingJobRequest

@note When making an API call, you may pass CreateProcessingJobRequest

data as a hash:

    {
      processing_inputs: [
        {
          input_name: "String", # required
          app_managed: false,
          s3_input: {
            s3_uri: "S3Uri", # required
            local_path: "ProcessingLocalPath",
            s3_data_type: "ManifestFile", # required, accepts ManifestFile, S3Prefix
            s3_input_mode: "Pipe", # accepts Pipe, File
            s3_data_distribution_type: "FullyReplicated", # accepts FullyReplicated, ShardedByS3Key
            s3_compression_type: "None", # accepts None, Gzip
          },
          dataset_definition: {
            athena_dataset_definition: {
              catalog: "AthenaCatalog", # required
              database: "AthenaDatabase", # required
              query_string: "AthenaQueryString", # required
              work_group: "AthenaWorkGroup",
              output_s3_uri: "S3Uri", # required
              kms_key_id: "KmsKeyId",
              output_format: "PARQUET", # required, accepts PARQUET, ORC, AVRO, JSON, TEXTFILE
              output_compression: "GZIP", # accepts GZIP, SNAPPY, ZLIB
            },
            redshift_dataset_definition: {
              cluster_id: "RedshiftClusterId", # required
              database: "RedshiftDatabase", # required
              db_user: "RedshiftUserName", # required
              query_string: "RedshiftQueryString", # required
              cluster_role_arn: "RoleArn", # required
              output_s3_uri: "S3Uri", # required
              kms_key_id: "KmsKeyId",
              output_format: "PARQUET", # required, accepts PARQUET, CSV
              output_compression: "None", # accepts None, GZIP, BZIP2, ZSTD, SNAPPY
            },
            local_path: "ProcessingLocalPath",
            data_distribution_type: "FullyReplicated", # accepts FullyReplicated, ShardedByS3Key
            input_mode: "Pipe", # accepts Pipe, File
          },
        },
      ],
      processing_output_config: {
        outputs: [ # required
          {
            output_name: "String", # required
            s3_output: {
              s3_uri: "S3Uri", # required
              local_path: "ProcessingLocalPath", # required
              s3_upload_mode: "Continuous", # required, accepts Continuous, EndOfJob
            },
            feature_store_output: {
              feature_group_name: "FeatureGroupName", # required
            },
            app_managed: false,
          },
        ],
        kms_key_id: "KmsKeyId",
      },
      processing_job_name: "ProcessingJobName", # required
      processing_resources: { # required
        cluster_config: { # required
          instance_count: 1, # required
          instance_type: "ml.t3.medium", # required, accepts ml.t3.medium, ml.t3.large, ml.t3.xlarge, ml.t3.2xlarge, ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.r5.large, ml.r5.xlarge, ml.r5.2xlarge, ml.r5.4xlarge, ml.r5.8xlarge, ml.r5.12xlarge, ml.r5.16xlarge, ml.r5.24xlarge, ml.g4dn.xlarge, ml.g4dn.2xlarge, ml.g4dn.4xlarge, ml.g4dn.8xlarge, ml.g4dn.12xlarge, ml.g4dn.16xlarge
          volume_size_in_gb: 1, # required
          volume_kms_key_id: "KmsKeyId",
        },
      },
      stopping_condition: {
        max_runtime_in_seconds: 1, # required
      },
      app_specification: { # required
        image_uri: "ImageUri", # required
        container_entrypoint: ["ContainerEntrypointString"],
        container_arguments: ["ContainerArgument"],
      },
      environment: {
        "ProcessingEnvironmentKey" => "ProcessingEnvironmentValue",
      },
      network_config: {
        enable_inter_container_traffic_encryption: false,
        enable_network_isolation: false,
        vpc_config: {
          security_group_ids: ["SecurityGroupId"], # required
          subnets: ["SubnetId"], # required
        },
      },
      role_arn: "RoleArn", # required
      tags: [
        {
          key: "TagKey", # required
          value: "TagValue", # required
        },
      ],
      experiment_config: {
        experiment_name: "ExperimentEntityName",
        trial_name: "ExperimentEntityName",
        trial_component_display_name: "ExperimentEntityName",
      },
    }

@!attribute [rw] processing_inputs

An array of inputs configuring the data to download into the
processing container.
@return [Array<Types::ProcessingInput>]

@!attribute [rw] processing_output_config

Output configuration for the processing job.
@return [Types::ProcessingOutputConfig]

@!attribute [rw] processing_job_name

The name of the processing job. The name must be unique within an
Amazon Web Services Region in the Amazon Web Services account.
@return [String]

@!attribute [rw] processing_resources

Identifies the resources, ML compute instances, and ML storage
volumes to deploy for a processing job. In distributed training, you
specify more than one instance.
@return [Types::ProcessingResources]

@!attribute [rw] stopping_condition

The time limit for how long the processing job is allowed to run.
@return [Types::ProcessingStoppingCondition]

@!attribute [rw] app_specification

Configures the processing job to run a specified Docker container
image.
@return [Types::AppSpecification]

@!attribute [rw] environment

The environment variables to set in the Docker container. Up to 100
key and values entries in the map are supported.
@return [Hash<String,String>]

@!attribute [rw] network_config

Networking options for a processing job, such as whether to allow
inbound and outbound network calls to and from processing
containers, and the VPC subnets and security groups to use for
VPC-enabled processing jobs.
@return [Types::NetworkConfig]

@!attribute [rw] role_arn

The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker
can assume to perform tasks on your behalf.
@return [String]

@!attribute [rw] tags

(Optional) An array of key-value pairs. For more information, see
[Using Cost Allocation Tags][1] in the *Amazon Web Services Billing
and Cost Management User Guide*.

[1]: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/cost-alloc-tags.html#allocation-whatURL
@return [Array<Types::Tag>]

@!attribute [rw] experiment_config

Associates a SageMaker job as a trial component with an experiment
and trial. Specified when you call the following APIs:

* CreateProcessingJob

* CreateTrainingJob

* CreateTransformJob
@return [Types::ExperimentConfig]

@see docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/CreateProcessingJobRequest AWS API Documentation

Constants

SENSITIVE