1. Packages
  2. Packages
  3. AWS
  4. API Docs
  5. sagemaker
  6. TrainingJob
Viewing docs for AWS v7.28.0
published on Thursday, Apr 30, 2026 by Pulumi
aws logo
Viewing docs for AWS v7.28.0
published on Thursday, Apr 30, 2026 by Pulumi

    Manages an AWS SageMaker AI Training Job.

    Example Usage

    Basic Usage

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
        },
        outputDataConfig: {
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.large",
            instanceCount: 1,
            volumeSizeInGb: 30,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
        },
        output_data_config={
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.large",
            "instance_count": 1,
            "volume_size_in_gb": 30,
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName: pulumi.String("example"),
    			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode: pulumi.String("File"),
    				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:   pulumi.String("ml.m5.large"),
    				InstanceCount:  pulumi.Int(1),
    				VolumeSizeInGb: pulumi.Int(30),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds: pulumi.Int(3600),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.large",
                InstanceCount = 1,
                VolumeSizeInGb = 30,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                    .build())
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.large")
                    .instanceCount(1)
                    .volumeSizeInGb(30)
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
          outputDataConfig:
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.large
            instanceCount: 1
            volumeSizeInGb: 30
          stoppingCondition:
            maxRuntimeInSeconds: 3600
    

    With VPC Configuration

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
        },
        outputDataConfig: {
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.large",
            instanceCount: 1,
            volumeSizeInGb: 30,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
        },
        vpcConfig: {
            securityGroupIds: [exampleAwsSecurityGroup.id],
            subnets: [exampleAwsSubnet.id],
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
        },
        output_data_config={
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.large",
            "instance_count": 1,
            "volume_size_in_gb": 30,
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
        },
        vpc_config={
            "security_group_ids": [example_aws_security_group["id"]],
            "subnets": [example_aws_subnet["id"]],
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName: pulumi.String("example"),
    			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode: pulumi.String("File"),
    				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:   pulumi.String("ml.m5.large"),
    				InstanceCount:  pulumi.Int(1),
    				VolumeSizeInGb: pulumi.Int(30),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds: pulumi.Int(3600),
    			},
    			VpcConfig: &sagemaker.TrainingJobVpcConfigArgs{
    				SecurityGroupIds: pulumi.StringArray{
    					exampleAwsSecurityGroup.Id,
    				},
    				Subnets: pulumi.StringArray{
    					exampleAwsSubnet.Id,
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.large",
                InstanceCount = 1,
                VolumeSizeInGb = 30,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
            },
            VpcConfig = new Aws.Sagemaker.Inputs.TrainingJobVpcConfigArgs
            {
                SecurityGroupIds = new[]
                {
                    exampleAwsSecurityGroup.Id,
                },
                Subnets = new[]
                {
                    exampleAwsSubnet.Id,
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobVpcConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                    .build())
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.large")
                    .instanceCount(1)
                    .volumeSizeInGb(30)
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .build())
                .vpcConfig(TrainingJobVpcConfigArgs.builder()
                    .securityGroupIds(exampleAwsSecurityGroup.id())
                    .subnets(exampleAwsSubnet.id())
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
          outputDataConfig:
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.large
            instanceCount: 1
            volumeSizeInGb: 30
          stoppingCondition:
            maxRuntimeInSeconds: 3600
          vpcConfig:
            securityGroupIds:
              - ${exampleAwsSecurityGroup.id}
            subnets:
              - ${exampleAwsSubnet.id}
    

    With Input Data and Hyperparameters

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
            enableSagemakerMetricsTimeSeries: true,
        },
        hyperParameters: {
            mini_batch_size: "200",
            epochs: "10",
        },
        inputDataConfigs: [{
            channelName: "train",
            dataSource: {
                s3DataSource: {
                    s3DataType: "S3Prefix",
                    s3Uri: `s3://${exampleAwsS3Bucket.bucket}/train/`,
                },
            },
        }],
        outputDataConfig: {
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.large",
            instanceCount: 1,
            volumeSizeInGb: 30,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
            "enable_sagemaker_metrics_time_series": True,
        },
        hyper_parameters={
            "mini_batch_size": "200",
            "epochs": "10",
        },
        input_data_configs=[{
            "channel_name": "train",
            "data_source": {
                "s3_data_source": {
                    "s3_data_type": "S3Prefix",
                    "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/train/",
                },
            },
        }],
        output_data_config={
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.large",
            "instance_count": 1,
            "volume_size_in_gb": 30,
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName: pulumi.String("example"),
    			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode:                pulumi.String("File"),
    				TrainingImage:                    pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
    				EnableSagemakerMetricsTimeSeries: pulumi.Bool(true),
    			},
    			HyperParameters: pulumi.StringMap{
    				"mini_batch_size": pulumi.String("200"),
    				"epochs":          pulumi.String("10"),
    			},
    			InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
    				&sagemaker.TrainingJobInputDataConfigArgs{
    					ChannelName: pulumi.String("train"),
    					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
    						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
    							S3DataType: pulumi.String("S3Prefix"),
    							S3Uri:      pulumi.Sprintf("s3://%v/train/", exampleAwsS3Bucket.Bucket),
    						},
    					},
    				},
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:   pulumi.String("ml.m5.large"),
    				InstanceCount:  pulumi.Int(1),
    				VolumeSizeInGb: pulumi.Int(30),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds: pulumi.Int(3600),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
                EnableSagemakerMetricsTimeSeries = true,
            },
            HyperParameters = 
            {
                { "mini_batch_size", "200" },
                { "epochs", "10" },
            },
            InputDataConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
                {
                    ChannelName = "train",
                    DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                    {
                        S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                        {
                            S3DataType = "S3Prefix",
                            S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/train/",
                        },
                    },
                },
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.large",
                InstanceCount = 1,
                VolumeSizeInGb = 30,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                    .enableSagemakerMetricsTimeSeries(true)
                    .build())
                .hyperParameters(Map.ofEntries(
                    Map.entry("mini_batch_size", "200"),
                    Map.entry("epochs", "10")
                ))
                .inputDataConfigs(TrainingJobInputDataConfigArgs.builder()
                    .channelName("train")
                    .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                        .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                            .s3DataType("S3Prefix")
                            .s3Uri(String.format("s3://%s/train/", exampleAwsS3Bucket.bucket()))
                            .build())
                        .build())
                    .build())
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.large")
                    .instanceCount(1)
                    .volumeSizeInGb(30)
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
            enableSagemakerMetricsTimeSeries: true
          hyperParameters:
            mini_batch_size: '200'
            epochs: '10'
          inputDataConfigs:
            - channelName: train
              dataSource:
                s3DataSource:
                  s3DataType: S3Prefix
                  s3Uri: s3://${exampleAwsS3Bucket.bucket}/train/
          outputDataConfig:
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.large
            instanceCount: 1
            volumeSizeInGb: 30
          stoppingCondition:
            maxRuntimeInSeconds: 3600
    

    With Encrypted Output, Checkpoints, and TensorBoard

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
        },
        checkpointConfig: {
            localPath: "/opt/ml/checkpoints",
            s3Uri: `s3://${exampleAwsS3Bucket.bucket}/checkpoints/`,
        },
        outputDataConfig: {
            compressionType: "GZIP",
            kmsKeyId: exampleAwsKmsKey.arn,
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.large",
            instanceCount: 1,
            volumeSizeInGb: 30,
            volumeKmsKeyId: exampleAwsKmsKey.arn,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
        },
        tensorBoardOutputConfig: {
            localPath: "/opt/ml/output/tensorboard",
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/tensorboard/`,
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
        },
        checkpoint_config={
            "local_path": "/opt/ml/checkpoints",
            "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/checkpoints/",
        },
        output_data_config={
            "compression_type": "GZIP",
            "kms_key_id": example_aws_kms_key["arn"],
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.large",
            "instance_count": 1,
            "volume_size_in_gb": 30,
            "volume_kms_key_id": example_aws_kms_key["arn"],
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
        },
        tensor_board_output_config={
            "local_path": "/opt/ml/output/tensorboard",
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/tensorboard/",
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName: pulumi.String("example"),
    			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode: pulumi.String("File"),
    				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
    			},
    			CheckpointConfig: &sagemaker.TrainingJobCheckpointConfigArgs{
    				LocalPath: pulumi.String("/opt/ml/checkpoints"),
    				S3Uri:     pulumi.Sprintf("s3://%v/checkpoints/", exampleAwsS3Bucket.Bucket),
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				CompressionType: pulumi.String("GZIP"),
    				KmsKeyId:        pulumi.Any(exampleAwsKmsKey.Arn),
    				S3OutputPath:    pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:   pulumi.String("ml.m5.large"),
    				InstanceCount:  pulumi.Int(1),
    				VolumeSizeInGb: pulumi.Int(30),
    				VolumeKmsKeyId: pulumi.Any(exampleAwsKmsKey.Arn),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds: pulumi.Int(3600),
    			},
    			TensorBoardOutputConfig: &sagemaker.TrainingJobTensorBoardOutputConfigArgs{
    				LocalPath:    pulumi.String("/opt/ml/output/tensorboard"),
    				S3OutputPath: pulumi.Sprintf("s3://%v/tensorboard/", exampleAwsS3Bucket.Bucket),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
            },
            CheckpointConfig = new Aws.Sagemaker.Inputs.TrainingJobCheckpointConfigArgs
            {
                LocalPath = "/opt/ml/checkpoints",
                S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/checkpoints/",
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                CompressionType = "GZIP",
                KmsKeyId = exampleAwsKmsKey.Arn,
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.large",
                InstanceCount = 1,
                VolumeSizeInGb = 30,
                VolumeKmsKeyId = exampleAwsKmsKey.Arn,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
            },
            TensorBoardOutputConfig = new Aws.Sagemaker.Inputs.TrainingJobTensorBoardOutputConfigArgs
            {
                LocalPath = "/opt/ml/output/tensorboard",
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/tensorboard/",
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobCheckpointConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobTensorBoardOutputConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                    .build())
                .checkpointConfig(TrainingJobCheckpointConfigArgs.builder()
                    .localPath("/opt/ml/checkpoints")
                    .s3Uri(String.format("s3://%s/checkpoints/", exampleAwsS3Bucket.bucket()))
                    .build())
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .compressionType("GZIP")
                    .kmsKeyId(exampleAwsKmsKey.arn())
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.large")
                    .instanceCount(1)
                    .volumeSizeInGb(30)
                    .volumeKmsKeyId(exampleAwsKmsKey.arn())
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .build())
                .tensorBoardOutputConfig(TrainingJobTensorBoardOutputConfigArgs.builder()
                    .localPath("/opt/ml/output/tensorboard")
                    .s3OutputPath(String.format("s3://%s/tensorboard/", exampleAwsS3Bucket.bucket()))
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
          checkpointConfig:
            localPath: /opt/ml/checkpoints
            s3Uri: s3://${exampleAwsS3Bucket.bucket}/checkpoints/
          outputDataConfig:
            compressionType: GZIP
            kmsKeyId: ${exampleAwsKmsKey.arn}
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.large
            instanceCount: 1
            volumeSizeInGb: 30
            volumeKmsKeyId: ${exampleAwsKmsKey.arn}
          stoppingCondition:
            maxRuntimeInSeconds: 3600
          tensorBoardOutputConfig:
            localPath: /opt/ml/output/tensorboard
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/tensorboard/
    

    With Managed Spot Training and Custom Metrics

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        enableManagedSpotTraining: true,
        enableNetworkIsolation: true,
        enableInterContainerTrafficEncryption: true,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: trainingImage,
            containerEntrypoints: [
                "python",
                "/opt/ml/code/train.py",
            ],
            containerArguments: [
                "--epochs",
                "10",
                "--batch-size",
                "128",
            ],
            metricDefinitions: [
                {
                    name: "train:loss",
                    regex: "loss: ([0-9\\.]+)",
                },
                {
                    name: "validation:accuracy",
                    regex: "accuracy: ([0-9\\.]+)",
                },
            ],
        },
        environment: {
            MODEL_DIR: "/opt/ml/model",
            SM_LOG_LEVEL: "20",
        },
        hyperParameters: {
            epochs: "10",
            batch_size: "128",
        },
        outputDataConfig: {
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.xlarge",
            instanceCount: 1,
            volumeSizeInGb: 50,
            keepAlivePeriodInSeconds: 600,
        },
        retryStrategy: {
            maximumRetryAttempts: 3,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
            maxWaitTimeInSeconds: 7200,
        },
        tags: {
            Environment: "test",
            Workload: "training",
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        enable_managed_spot_training=True,
        enable_network_isolation=True,
        enable_inter_container_traffic_encryption=True,
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": training_image,
            "container_entrypoints": [
                "python",
                "/opt/ml/code/train.py",
            ],
            "container_arguments": [
                "--epochs",
                "10",
                "--batch-size",
                "128",
            ],
            "metric_definitions": [
                {
                    "name": "train:loss",
                    "regex": "loss: ([0-9\\.]+)",
                },
                {
                    "name": "validation:accuracy",
                    "regex": "accuracy: ([0-9\\.]+)",
                },
            ],
        },
        environment={
            "MODEL_DIR": "/opt/ml/model",
            "SM_LOG_LEVEL": "20",
        },
        hyper_parameters={
            "epochs": "10",
            "batch_size": "128",
        },
        output_data_config={
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.xlarge",
            "instance_count": 1,
            "volume_size_in_gb": 50,
            "keep_alive_period_in_seconds": 600,
        },
        retry_strategy={
            "maximum_retry_attempts": 3,
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
            "max_wait_time_in_seconds": 7200,
        },
        tags={
            "Environment": "test",
            "Workload": "training",
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName:                       pulumi.String("example"),
    			RoleArn:                               pulumi.Any(exampleAwsIamRole.Arn),
    			EnableManagedSpotTraining:             pulumi.Bool(true),
    			EnableNetworkIsolation:                pulumi.Bool(true),
    			EnableInterContainerTrafficEncryption: pulumi.Bool(true),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode: pulumi.String("File"),
    				TrainingImage:     pulumi.Any(trainingImage),
    				ContainerEntrypoints: pulumi.StringArray{
    					pulumi.String("python"),
    					pulumi.String("/opt/ml/code/train.py"),
    				},
    				ContainerArguments: pulumi.StringArray{
    					pulumi.String("--epochs"),
    					pulumi.String("10"),
    					pulumi.String("--batch-size"),
    					pulumi.String("128"),
    				},
    				MetricDefinitions: sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArray{
    					&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
    						Name:  pulumi.String("train:loss"),
    						Regex: pulumi.String("loss: ([0-9\\.]+)"),
    					},
    					&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
    						Name:  pulumi.String("validation:accuracy"),
    						Regex: pulumi.String("accuracy: ([0-9\\.]+)"),
    					},
    				},
    			},
    			Environment: pulumi.StringMap{
    				"MODEL_DIR":    pulumi.String("/opt/ml/model"),
    				"SM_LOG_LEVEL": pulumi.String("20"),
    			},
    			HyperParameters: pulumi.StringMap{
    				"epochs":     pulumi.String("10"),
    				"batch_size": pulumi.String("128"),
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:             pulumi.String("ml.m5.xlarge"),
    				InstanceCount:            pulumi.Int(1),
    				VolumeSizeInGb:           pulumi.Int(50),
    				KeepAlivePeriodInSeconds: pulumi.Int(600),
    			},
    			RetryStrategy: &sagemaker.TrainingJobRetryStrategyArgs{
    				MaximumRetryAttempts: pulumi.Int(3),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds:  pulumi.Int(3600),
    				MaxWaitTimeInSeconds: pulumi.Int(7200),
    			},
    			Tags: pulumi.StringMap{
    				"Environment": pulumi.String("test"),
    				"Workload":    pulumi.String("training"),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            EnableManagedSpotTraining = true,
            EnableNetworkIsolation = true,
            EnableInterContainerTrafficEncryption = true,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = trainingImage,
                ContainerEntrypoints = new[]
                {
                    "python",
                    "/opt/ml/code/train.py",
                },
                ContainerArguments = new[]
                {
                    "--epochs",
                    "10",
                    "--batch-size",
                    "128",
                },
                MetricDefinitions = new[]
                {
                    new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
                    {
                        Name = "train:loss",
                        Regex = "loss: ([0-9\\.]+)",
                    },
                    new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
                    {
                        Name = "validation:accuracy",
                        Regex = "accuracy: ([0-9\\.]+)",
                    },
                },
            },
            Environment = 
            {
                { "MODEL_DIR", "/opt/ml/model" },
                { "SM_LOG_LEVEL", "20" },
            },
            HyperParameters = 
            {
                { "epochs", "10" },
                { "batch_size", "128" },
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.xlarge",
                InstanceCount = 1,
                VolumeSizeInGb = 50,
                KeepAlivePeriodInSeconds = 600,
            },
            RetryStrategy = new Aws.Sagemaker.Inputs.TrainingJobRetryStrategyArgs
            {
                MaximumRetryAttempts = 3,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
                MaxWaitTimeInSeconds = 7200,
            },
            Tags = 
            {
                { "Environment", "test" },
                { "Workload", "training" },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobRetryStrategyArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .enableManagedSpotTraining(true)
                .enableNetworkIsolation(true)
                .enableInterContainerTrafficEncryption(true)
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(trainingImage)
                    .containerEntrypoints(                
                        "python",
                        "/opt/ml/code/train.py")
                    .containerArguments(                
                        "--epochs",
                        "10",
                        "--batch-size",
                        "128")
                    .metricDefinitions(                
                        TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
                            .name("train:loss")
                            .regex("loss: ([0-9\\.]+)")
                            .build(),
                        TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
                            .name("validation:accuracy")
                            .regex("accuracy: ([0-9\\.]+)")
                            .build())
                    .build())
                .environment(Map.ofEntries(
                    Map.entry("MODEL_DIR", "/opt/ml/model"),
                    Map.entry("SM_LOG_LEVEL", "20")
                ))
                .hyperParameters(Map.ofEntries(
                    Map.entry("epochs", "10"),
                    Map.entry("batch_size", "128")
                ))
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.xlarge")
                    .instanceCount(1)
                    .volumeSizeInGb(50)
                    .keepAlivePeriodInSeconds(600)
                    .build())
                .retryStrategy(TrainingJobRetryStrategyArgs.builder()
                    .maximumRetryAttempts(3)
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .maxWaitTimeInSeconds(7200)
                    .build())
                .tags(Map.ofEntries(
                    Map.entry("Environment", "test"),
                    Map.entry("Workload", "training")
                ))
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          enableManagedSpotTraining: true
          enableNetworkIsolation: true
          enableInterContainerTrafficEncryption: true
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${trainingImage}
            containerEntrypoints:
              - python
              - /opt/ml/code/train.py
            containerArguments:
              - --epochs
              - '10'
              - --batch-size
              - '128'
            metricDefinitions:
              - name: train:loss
                regex: 'loss: ([0-9\.]+)'
              - name: validation:accuracy
                regex: 'accuracy: ([0-9\.]+)'
          environment:
            MODEL_DIR: /opt/ml/model
            SM_LOG_LEVEL: '20'
          hyperParameters:
            epochs: '10'
            batch_size: '128'
          outputDataConfig:
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.xlarge
            instanceCount: 1
            volumeSizeInGb: 50
            keepAlivePeriodInSeconds: 600
          retryStrategy:
            maximumRetryAttempts: 3
          stoppingCondition:
            maxRuntimeInSeconds: 3600
            maxWaitTimeInSeconds: 7200
          tags:
            Environment: test
            Workload: training
    

    With Multiple Input Channels, Infrastructure Checks, and Session Tag Chaining

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.sagemaker.TrainingJob("example", {
        trainingJobName: "example",
        roleArn: exampleAwsIamRole.arn,
        algorithmSpecification: {
            trainingInputMode: "File",
            trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
        },
        inputDataConfigs: [
            {
                channelName: "train",
                contentType: "text/csv",
                inputMode: "File",
                dataSource: {
                    s3DataSource: {
                        s3DataDistributionType: "FullyReplicated",
                        s3DataType: "S3Prefix",
                        s3Uri: `s3://${exampleAwsS3Bucket.bucket}/train/`,
                    },
                },
            },
            {
                channelName: "validation",
                contentType: "text/csv",
                inputMode: "File",
                dataSource: {
                    s3DataSource: {
                        s3DataDistributionType: "FullyReplicated",
                        s3DataType: "S3Prefix",
                        s3Uri: `s3://${exampleAwsS3Bucket.bucket}/validation/`,
                    },
                },
            },
        ],
        infraCheckConfig: {
            enableInfraCheck: true,
        },
        outputDataConfig: {
            s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
        },
        resourceConfig: {
            instanceType: "ml.m5.large",
            instanceCount: 1,
            volumeSizeInGb: 30,
        },
        sessionChainingConfig: {
            enableSessionTagChaining: true,
        },
        stoppingCondition: {
            maxRuntimeInSeconds: 3600,
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.sagemaker.TrainingJob("example",
        training_job_name="example",
        role_arn=example_aws_iam_role["arn"],
        algorithm_specification={
            "training_input_mode": "File",
            "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
        },
        input_data_configs=[
            {
                "channel_name": "train",
                "content_type": "text/csv",
                "input_mode": "File",
                "data_source": {
                    "s3_data_source": {
                        "s3_data_distribution_type": "FullyReplicated",
                        "s3_data_type": "S3Prefix",
                        "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/train/",
                    },
                },
            },
            {
                "channel_name": "validation",
                "content_type": "text/csv",
                "input_mode": "File",
                "data_source": {
                    "s3_data_source": {
                        "s3_data_distribution_type": "FullyReplicated",
                        "s3_data_type": "S3Prefix",
                        "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/validation/",
                    },
                },
            },
        ],
        infra_check_config={
            "enable_infra_check": True,
        },
        output_data_config={
            "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
        },
        resource_config={
            "instance_type": "ml.m5.large",
            "instance_count": 1,
            "volume_size_in_gb": 30,
        },
        session_chaining_config={
            "enable_session_tag_chaining": True,
        },
        stopping_condition={
            "max_runtime_in_seconds": 3600,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
    			TrainingJobName: pulumi.String("example"),
    			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
    			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    				TrainingInputMode: pulumi.String("File"),
    				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
    			},
    			InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
    				&sagemaker.TrainingJobInputDataConfigArgs{
    					ChannelName: pulumi.String("train"),
    					ContentType: pulumi.String("text/csv"),
    					InputMode:   pulumi.String("File"),
    					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
    						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
    							S3DataDistributionType: pulumi.String("FullyReplicated"),
    							S3DataType:             pulumi.String("S3Prefix"),
    							S3Uri:                  pulumi.Sprintf("s3://%v/train/", exampleAwsS3Bucket.Bucket),
    						},
    					},
    				},
    				&sagemaker.TrainingJobInputDataConfigArgs{
    					ChannelName: pulumi.String("validation"),
    					ContentType: pulumi.String("text/csv"),
    					InputMode:   pulumi.String("File"),
    					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
    						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
    							S3DataDistributionType: pulumi.String("FullyReplicated"),
    							S3DataType:             pulumi.String("S3Prefix"),
    							S3Uri:                  pulumi.Sprintf("s3://%v/validation/", exampleAwsS3Bucket.Bucket),
    						},
    					},
    				},
    			},
    			InfraCheckConfig: &sagemaker.TrainingJobInfraCheckConfigArgs{
    				EnableInfraCheck: pulumi.Bool(true),
    			},
    			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
    			},
    			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    				InstanceType:   pulumi.String("ml.m5.large"),
    				InstanceCount:  pulumi.Int(1),
    				VolumeSizeInGb: pulumi.Int(30),
    			},
    			SessionChainingConfig: &sagemaker.TrainingJobSessionChainingConfigArgs{
    				EnableSessionTagChaining: pulumi.Bool(true),
    			},
    			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    				MaxRuntimeInSeconds: pulumi.Int(3600),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Sagemaker.TrainingJob("example", new()
        {
            TrainingJobName = "example",
            RoleArn = exampleAwsIamRole.Arn,
            AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
            {
                TrainingInputMode = "File",
                TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
            },
            InputDataConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
                {
                    ChannelName = "train",
                    ContentType = "text/csv",
                    InputMode = "File",
                    DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                    {
                        S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                        {
                            S3DataDistributionType = "FullyReplicated",
                            S3DataType = "S3Prefix",
                            S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/train/",
                        },
                    },
                },
                new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
                {
                    ChannelName = "validation",
                    ContentType = "text/csv",
                    InputMode = "File",
                    DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                    {
                        S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                        {
                            S3DataDistributionType = "FullyReplicated",
                            S3DataType = "S3Prefix",
                            S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/validation/",
                        },
                    },
                },
            },
            InfraCheckConfig = new Aws.Sagemaker.Inputs.TrainingJobInfraCheckConfigArgs
            {
                EnableInfraCheck = true,
            },
            OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
            {
                S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
            },
            ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
            {
                InstanceType = "ml.m5.large",
                InstanceCount = 1,
                VolumeSizeInGb = 30,
            },
            SessionChainingConfig = new Aws.Sagemaker.Inputs.TrainingJobSessionChainingConfigArgs
            {
                EnableSessionTagChaining = true,
            },
            StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
            {
                MaxRuntimeInSeconds = 3600,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.TrainingJob;
    import com.pulumi.aws.sagemaker.TrainingJobArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobInfraCheckConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobSessionChainingConfigArgs;
    import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new TrainingJob("example", TrainingJobArgs.builder()
                .trainingJobName("example")
                .roleArn(exampleAwsIamRole.arn())
                .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                    .trainingInputMode("File")
                    .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                    .build())
                .inputDataConfigs(            
                    TrainingJobInputDataConfigArgs.builder()
                        .channelName("train")
                        .contentType("text/csv")
                        .inputMode("File")
                        .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                            .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                                .s3DataDistributionType("FullyReplicated")
                                .s3DataType("S3Prefix")
                                .s3Uri(String.format("s3://%s/train/", exampleAwsS3Bucket.bucket()))
                                .build())
                            .build())
                        .build(),
                    TrainingJobInputDataConfigArgs.builder()
                        .channelName("validation")
                        .contentType("text/csv")
                        .inputMode("File")
                        .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                            .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                                .s3DataDistributionType("FullyReplicated")
                                .s3DataType("S3Prefix")
                                .s3Uri(String.format("s3://%s/validation/", exampleAwsS3Bucket.bucket()))
                                .build())
                            .build())
                        .build())
                .infraCheckConfig(TrainingJobInfraCheckConfigArgs.builder()
                    .enableInfraCheck(true)
                    .build())
                .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                    .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                    .build())
                .resourceConfig(TrainingJobResourceConfigArgs.builder()
                    .instanceType("ml.m5.large")
                    .instanceCount(1)
                    .volumeSizeInGb(30)
                    .build())
                .sessionChainingConfig(TrainingJobSessionChainingConfigArgs.builder()
                    .enableSessionTagChaining(true)
                    .build())
                .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                    .maxRuntimeInSeconds(3600)
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:sagemaker:TrainingJob
        properties:
          trainingJobName: example
          roleArn: ${exampleAwsIamRole.arn}
          algorithmSpecification:
            trainingInputMode: File
            trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
          inputDataConfigs:
            - channelName: train
              contentType: text/csv
              inputMode: File
              dataSource:
                s3DataSource:
                  s3DataDistributionType: FullyReplicated
                  s3DataType: S3Prefix
                  s3Uri: s3://${exampleAwsS3Bucket.bucket}/train/
            - channelName: validation
              contentType: text/csv
              inputMode: File
              dataSource:
                s3DataSource:
                  s3DataDistributionType: FullyReplicated
                  s3DataType: S3Prefix
                  s3Uri: s3://${exampleAwsS3Bucket.bucket}/validation/
          infraCheckConfig:
            enableInfraCheck: true
          outputDataConfig:
            s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
          resourceConfig:
            instanceType: ml.m5.large
            instanceCount: 1
            volumeSizeInGb: 30
          sessionChainingConfig:
            enableSessionTagChaining: true
          stoppingCondition:
            maxRuntimeInSeconds: 3600
    

    Create TrainingJob Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new TrainingJob(name: string, args: TrainingJobArgs, opts?: CustomResourceOptions);
    @overload
    def TrainingJob(resource_name: str,
                    args: TrainingJobArgs,
                    opts: Optional[ResourceOptions] = None)
    
    @overload
    def TrainingJob(resource_name: str,
                    opts: Optional[ResourceOptions] = None,
                    role_arn: Optional[str] = None,
                    training_job_name: Optional[str] = None,
                    output_data_config: Optional[TrainingJobOutputDataConfigArgs] = None,
                    retry_strategy: Optional[TrainingJobRetryStrategyArgs] = None,
                    delete_model_packages_on_destroy: Optional[bool] = None,
                    delete_vpc_enis_on_destroy: Optional[bool] = None,
                    enable_inter_container_traffic_encryption: Optional[bool] = None,
                    enable_managed_spot_training: Optional[bool] = None,
                    enable_network_isolation: Optional[bool] = None,
                    environment: Optional[Mapping[str, str]] = None,
                    experiment_config: Optional[TrainingJobExperimentConfigArgs] = None,
                    hyper_parameters: Optional[Mapping[str, str]] = None,
                    infra_check_config: Optional[TrainingJobInfraCheckConfigArgs] = None,
                    input_data_configs: Optional[Sequence[TrainingJobInputDataConfigArgs]] = None,
                    mlflow_config: Optional[TrainingJobMlflowConfigArgs] = None,
                    model_package_config: Optional[TrainingJobModelPackageConfigArgs] = None,
                    debug_rule_configurations: Optional[Sequence[TrainingJobDebugRuleConfigurationArgs]] = None,
                    algorithm_specification: Optional[TrainingJobAlgorithmSpecificationArgs] = None,
                    serverless_job_config: Optional[TrainingJobServerlessJobConfigArgs] = None,
                    region: Optional[str] = None,
                    remote_debug_config: Optional[TrainingJobRemoteDebugConfigArgs] = None,
                    resource_config: Optional[TrainingJobResourceConfigArgs] = None,
                    profiler_config: Optional[TrainingJobProfilerConfigArgs] = None,
                    debug_hook_config: Optional[TrainingJobDebugHookConfigArgs] = None,
                    profiler_rule_configurations: Optional[Sequence[TrainingJobProfilerRuleConfigurationArgs]] = None,
                    session_chaining_config: Optional[TrainingJobSessionChainingConfigArgs] = None,
                    stopping_condition: Optional[TrainingJobStoppingConditionArgs] = None,
                    tags: Optional[Mapping[str, str]] = None,
                    tensor_board_output_config: Optional[TrainingJobTensorBoardOutputConfigArgs] = None,
                    timeouts: Optional[TrainingJobTimeoutsArgs] = None,
                    checkpoint_config: Optional[TrainingJobCheckpointConfigArgs] = None,
                    vpc_config: Optional[TrainingJobVpcConfigArgs] = None)
    func NewTrainingJob(ctx *Context, name string, args TrainingJobArgs, opts ...ResourceOption) (*TrainingJob, error)
    public TrainingJob(string name, TrainingJobArgs args, CustomResourceOptions? opts = null)
    public TrainingJob(String name, TrainingJobArgs args)
    public TrainingJob(String name, TrainingJobArgs args, CustomResourceOptions options)
    
    type: aws:sagemaker:TrainingJob
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args TrainingJobArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args TrainingJobArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args TrainingJobArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args TrainingJobArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args TrainingJobArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var trainingJobResource = new Aws.Sagemaker.TrainingJob("trainingJobResource", new()
    {
        RoleArn = "string",
        TrainingJobName = "string",
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = "string",
            CompressionType = "string",
            KmsKeyId = "string",
        },
        RetryStrategy = new Aws.Sagemaker.Inputs.TrainingJobRetryStrategyArgs
        {
            MaximumRetryAttempts = 0,
        },
        DeleteModelPackagesOnDestroy = false,
        DeleteVpcEnisOnDestroy = false,
        EnableInterContainerTrafficEncryption = false,
        EnableManagedSpotTraining = false,
        EnableNetworkIsolation = false,
        Environment = 
        {
            { "string", "string" },
        },
        ExperimentConfig = new Aws.Sagemaker.Inputs.TrainingJobExperimentConfigArgs
        {
            ExperimentName = "string",
            RunName = "string",
            TrialComponentDisplayName = "string",
            TrialName = "string",
        },
        HyperParameters = 
        {
            { "string", "string" },
        },
        InfraCheckConfig = new Aws.Sagemaker.Inputs.TrainingJobInfraCheckConfigArgs
        {
            EnableInfraCheck = false,
        },
        InputDataConfigs = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
            {
                ChannelName = "string",
                CompressionType = "string",
                ContentType = "string",
                DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                {
                    FileSystemDataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs
                    {
                        DirectoryPath = "string",
                        FileSystemAccessMode = "string",
                        FileSystemId = "string",
                        FileSystemType = "string",
                    },
                    S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                    {
                        S3DataType = "string",
                        S3Uri = "string",
                        AttributeNames = new[]
                        {
                            "string",
                        },
                        HubAccessConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs
                        {
                            HubContentArn = "string",
                        },
                        InstanceGroupNames = new[]
                        {
                            "string",
                        },
                        ModelAccessConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs
                        {
                            AcceptEula = false,
                        },
                        S3DataDistributionType = "string",
                    },
                },
                InputMode = "string",
                RecordWrapperType = "string",
                ShuffleConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigShuffleConfigArgs
                {
                    Seed = 0,
                },
            },
        },
        MlflowConfig = new Aws.Sagemaker.Inputs.TrainingJobMlflowConfigArgs
        {
            MlflowResourceArn = "string",
            MlflowExperimentName = "string",
            MlflowRunName = "string",
        },
        ModelPackageConfig = new Aws.Sagemaker.Inputs.TrainingJobModelPackageConfigArgs
        {
            ModelPackageGroupArn = "string",
            SourceModelPackageArn = "string",
        },
        DebugRuleConfigurations = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobDebugRuleConfigurationArgs
            {
                RuleConfigurationName = "string",
                RuleEvaluatorImage = "string",
                InstanceType = "string",
                LocalPath = "string",
                RuleParameters = 
                {
                    { "string", "string" },
                },
                S3OutputPath = "string",
                VolumeSizeInGb = 0,
            },
        },
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            AlgorithmName = "string",
            ContainerArguments = new[]
            {
                "string",
            },
            ContainerEntrypoints = new[]
            {
                "string",
            },
            EnableSagemakerMetricsTimeSeries = false,
            MetricDefinitions = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
                {
                    Name = "string",
                    Regex = "string",
                },
            },
            TrainingImage = "string",
            TrainingImageConfig = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationTrainingImageConfigArgs
            {
                TrainingRepositoryAccessMode = "string",
                TrainingRepositoryAuthConfig = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs
                {
                    TrainingRepositoryCredentialsProviderArn = "string",
                },
            },
            TrainingInputMode = "string",
        },
        ServerlessJobConfig = new Aws.Sagemaker.Inputs.TrainingJobServerlessJobConfigArgs
        {
            BaseModelArn = "string",
            JobType = "string",
            AcceptEula = false,
            CustomizationTechnique = "string",
            EvaluationType = "string",
            EvaluatorArn = "string",
            Peft = "string",
        },
        Region = "string",
        RemoteDebugConfig = new Aws.Sagemaker.Inputs.TrainingJobRemoteDebugConfigArgs
        {
            EnableRemoteDebug = false,
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceCount = 0,
            InstanceGroups = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstanceGroupArgs
                {
                    InstanceCount = 0,
                    InstanceGroupName = "string",
                    InstanceType = "string",
                },
            },
            InstancePlacementConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstancePlacementConfigArgs
            {
                EnableMultipleJobs = false,
                PlacementSpecifications = new[]
                {
                    new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs
                    {
                        InstanceCount = 0,
                        UltraServerId = "string",
                    },
                },
            },
            InstanceType = "string",
            KeepAlivePeriodInSeconds = 0,
            TrainingPlanArn = "string",
            VolumeKmsKeyId = "string",
            VolumeSizeInGb = 0,
        },
        ProfilerConfig = new Aws.Sagemaker.Inputs.TrainingJobProfilerConfigArgs
        {
            DisableProfiler = false,
            ProfilingIntervalInMilliseconds = 0,
            ProfilingParameters = 
            {
                { "string", "string" },
            },
            S3OutputPath = "string",
        },
        DebugHookConfig = new Aws.Sagemaker.Inputs.TrainingJobDebugHookConfigArgs
        {
            S3OutputPath = "string",
            CollectionConfigurations = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobDebugHookConfigCollectionConfigurationArgs
                {
                    CollectionName = "string",
                    CollectionParameters = 
                    {
                        { "string", "string" },
                    },
                },
            },
            HookParameters = 
            {
                { "string", "string" },
            },
            LocalPath = "string",
        },
        ProfilerRuleConfigurations = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobProfilerRuleConfigurationArgs
            {
                RuleConfigurationName = "string",
                RuleEvaluatorImage = "string",
                InstanceType = "string",
                LocalPath = "string",
                RuleParameters = 
                {
                    { "string", "string" },
                },
                S3OutputPath = "string",
                VolumeSizeInGb = 0,
            },
        },
        SessionChainingConfig = new Aws.Sagemaker.Inputs.TrainingJobSessionChainingConfigArgs
        {
            EnableSessionTagChaining = false,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxPendingTimeInSeconds = 0,
            MaxRuntimeInSeconds = 0,
            MaxWaitTimeInSeconds = 0,
        },
        Tags = 
        {
            { "string", "string" },
        },
        TensorBoardOutputConfig = new Aws.Sagemaker.Inputs.TrainingJobTensorBoardOutputConfigArgs
        {
            S3OutputPath = "string",
            LocalPath = "string",
        },
        Timeouts = new Aws.Sagemaker.Inputs.TrainingJobTimeoutsArgs
        {
            Create = "string",
            Delete = "string",
            Update = "string",
        },
        CheckpointConfig = new Aws.Sagemaker.Inputs.TrainingJobCheckpointConfigArgs
        {
            S3Uri = "string",
            LocalPath = "string",
        },
        VpcConfig = new Aws.Sagemaker.Inputs.TrainingJobVpcConfigArgs
        {
            SecurityGroupIds = new[]
            {
                "string",
            },
            Subnets = new[]
            {
                "string",
            },
        },
    });
    
    example, err := sagemaker.NewTrainingJob(ctx, "trainingJobResource", &sagemaker.TrainingJobArgs{
    	RoleArn:         pulumi.String("string"),
    	TrainingJobName: pulumi.String("string"),
    	OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
    		S3OutputPath:    pulumi.String("string"),
    		CompressionType: pulumi.String("string"),
    		KmsKeyId:        pulumi.String("string"),
    	},
    	RetryStrategy: &sagemaker.TrainingJobRetryStrategyArgs{
    		MaximumRetryAttempts: pulumi.Int(0),
    	},
    	DeleteModelPackagesOnDestroy:          pulumi.Bool(false),
    	DeleteVpcEnisOnDestroy:                pulumi.Bool(false),
    	EnableInterContainerTrafficEncryption: pulumi.Bool(false),
    	EnableManagedSpotTraining:             pulumi.Bool(false),
    	EnableNetworkIsolation:                pulumi.Bool(false),
    	Environment: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    	ExperimentConfig: &sagemaker.TrainingJobExperimentConfigArgs{
    		ExperimentName:            pulumi.String("string"),
    		RunName:                   pulumi.String("string"),
    		TrialComponentDisplayName: pulumi.String("string"),
    		TrialName:                 pulumi.String("string"),
    	},
    	HyperParameters: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    	InfraCheckConfig: &sagemaker.TrainingJobInfraCheckConfigArgs{
    		EnableInfraCheck: pulumi.Bool(false),
    	},
    	InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
    		&sagemaker.TrainingJobInputDataConfigArgs{
    			ChannelName:     pulumi.String("string"),
    			CompressionType: pulumi.String("string"),
    			ContentType:     pulumi.String("string"),
    			DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
    				FileSystemDataSource: &sagemaker.TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs{
    					DirectoryPath:        pulumi.String("string"),
    					FileSystemAccessMode: pulumi.String("string"),
    					FileSystemId:         pulumi.String("string"),
    					FileSystemType:       pulumi.String("string"),
    				},
    				S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
    					S3DataType: pulumi.String("string"),
    					S3Uri:      pulumi.String("string"),
    					AttributeNames: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    					HubAccessConfig: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs{
    						HubContentArn: pulumi.String("string"),
    					},
    					InstanceGroupNames: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    					ModelAccessConfig: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs{
    						AcceptEula: pulumi.Bool(false),
    					},
    					S3DataDistributionType: pulumi.String("string"),
    				},
    			},
    			InputMode:         pulumi.String("string"),
    			RecordWrapperType: pulumi.String("string"),
    			ShuffleConfig: &sagemaker.TrainingJobInputDataConfigShuffleConfigArgs{
    				Seed: pulumi.Int(0),
    			},
    		},
    	},
    	MlflowConfig: &sagemaker.TrainingJobMlflowConfigArgs{
    		MlflowResourceArn:    pulumi.String("string"),
    		MlflowExperimentName: pulumi.String("string"),
    		MlflowRunName:        pulumi.String("string"),
    	},
    	ModelPackageConfig: &sagemaker.TrainingJobModelPackageConfigArgs{
    		ModelPackageGroupArn:  pulumi.String("string"),
    		SourceModelPackageArn: pulumi.String("string"),
    	},
    	DebugRuleConfigurations: sagemaker.TrainingJobDebugRuleConfigurationArray{
    		&sagemaker.TrainingJobDebugRuleConfigurationArgs{
    			RuleConfigurationName: pulumi.String("string"),
    			RuleEvaluatorImage:    pulumi.String("string"),
    			InstanceType:          pulumi.String("string"),
    			LocalPath:             pulumi.String("string"),
    			RuleParameters: pulumi.StringMap{
    				"string": pulumi.String("string"),
    			},
    			S3OutputPath:   pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
    		AlgorithmName: pulumi.String("string"),
    		ContainerArguments: pulumi.StringArray{
    			pulumi.String("string"),
    		},
    		ContainerEntrypoints: pulumi.StringArray{
    			pulumi.String("string"),
    		},
    		EnableSagemakerMetricsTimeSeries: pulumi.Bool(false),
    		MetricDefinitions: sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArray{
    			&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
    				Name:  pulumi.String("string"),
    				Regex: pulumi.String("string"),
    			},
    		},
    		TrainingImage: pulumi.String("string"),
    		TrainingImageConfig: &sagemaker.TrainingJobAlgorithmSpecificationTrainingImageConfigArgs{
    			TrainingRepositoryAccessMode: pulumi.String("string"),
    			TrainingRepositoryAuthConfig: &sagemaker.TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs{
    				TrainingRepositoryCredentialsProviderArn: pulumi.String("string"),
    			},
    		},
    		TrainingInputMode: pulumi.String("string"),
    	},
    	ServerlessJobConfig: &sagemaker.TrainingJobServerlessJobConfigArgs{
    		BaseModelArn:           pulumi.String("string"),
    		JobType:                pulumi.String("string"),
    		AcceptEula:             pulumi.Bool(false),
    		CustomizationTechnique: pulumi.String("string"),
    		EvaluationType:         pulumi.String("string"),
    		EvaluatorArn:           pulumi.String("string"),
    		Peft:                   pulumi.String("string"),
    	},
    	Region: pulumi.String("string"),
    	RemoteDebugConfig: &sagemaker.TrainingJobRemoteDebugConfigArgs{
    		EnableRemoteDebug: pulumi.Bool(false),
    	},
    	ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
    		InstanceCount: pulumi.Int(0),
    		InstanceGroups: sagemaker.TrainingJobResourceConfigInstanceGroupArray{
    			&sagemaker.TrainingJobResourceConfigInstanceGroupArgs{
    				InstanceCount:     pulumi.Int(0),
    				InstanceGroupName: pulumi.String("string"),
    				InstanceType:      pulumi.String("string"),
    			},
    		},
    		InstancePlacementConfig: &sagemaker.TrainingJobResourceConfigInstancePlacementConfigArgs{
    			EnableMultipleJobs: pulumi.Bool(false),
    			PlacementSpecifications: sagemaker.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArray{
    				&sagemaker.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs{
    					InstanceCount: pulumi.Int(0),
    					UltraServerId: pulumi.String("string"),
    				},
    			},
    		},
    		InstanceType:             pulumi.String("string"),
    		KeepAlivePeriodInSeconds: pulumi.Int(0),
    		TrainingPlanArn:          pulumi.String("string"),
    		VolumeKmsKeyId:           pulumi.String("string"),
    		VolumeSizeInGb:           pulumi.Int(0),
    	},
    	ProfilerConfig: &sagemaker.TrainingJobProfilerConfigArgs{
    		DisableProfiler:                 pulumi.Bool(false),
    		ProfilingIntervalInMilliseconds: pulumi.Int(0),
    		ProfilingParameters: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    		S3OutputPath: pulumi.String("string"),
    	},
    	DebugHookConfig: &sagemaker.TrainingJobDebugHookConfigArgs{
    		S3OutputPath: pulumi.String("string"),
    		CollectionConfigurations: sagemaker.TrainingJobDebugHookConfigCollectionConfigurationArray{
    			&sagemaker.TrainingJobDebugHookConfigCollectionConfigurationArgs{
    				CollectionName: pulumi.String("string"),
    				CollectionParameters: pulumi.StringMap{
    					"string": pulumi.String("string"),
    				},
    			},
    		},
    		HookParameters: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    		LocalPath: pulumi.String("string"),
    	},
    	ProfilerRuleConfigurations: sagemaker.TrainingJobProfilerRuleConfigurationArray{
    		&sagemaker.TrainingJobProfilerRuleConfigurationArgs{
    			RuleConfigurationName: pulumi.String("string"),
    			RuleEvaluatorImage:    pulumi.String("string"),
    			InstanceType:          pulumi.String("string"),
    			LocalPath:             pulumi.String("string"),
    			RuleParameters: pulumi.StringMap{
    				"string": pulumi.String("string"),
    			},
    			S3OutputPath:   pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	SessionChainingConfig: &sagemaker.TrainingJobSessionChainingConfigArgs{
    		EnableSessionTagChaining: pulumi.Bool(false),
    	},
    	StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
    		MaxPendingTimeInSeconds: pulumi.Int(0),
    		MaxRuntimeInSeconds:     pulumi.Int(0),
    		MaxWaitTimeInSeconds:    pulumi.Int(0),
    	},
    	Tags: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    	TensorBoardOutputConfig: &sagemaker.TrainingJobTensorBoardOutputConfigArgs{
    		S3OutputPath: pulumi.String("string"),
    		LocalPath:    pulumi.String("string"),
    	},
    	Timeouts: &sagemaker.TrainingJobTimeoutsArgs{
    		Create: pulumi.String("string"),
    		Delete: pulumi.String("string"),
    		Update: pulumi.String("string"),
    	},
    	CheckpointConfig: &sagemaker.TrainingJobCheckpointConfigArgs{
    		S3Uri:     pulumi.String("string"),
    		LocalPath: pulumi.String("string"),
    	},
    	VpcConfig: &sagemaker.TrainingJobVpcConfigArgs{
    		SecurityGroupIds: pulumi.StringArray{
    			pulumi.String("string"),
    		},
    		Subnets: pulumi.StringArray{
    			pulumi.String("string"),
    		},
    	},
    })
    
    var trainingJobResource = new TrainingJob("trainingJobResource", TrainingJobArgs.builder()
        .roleArn("string")
        .trainingJobName("string")
        .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
            .s3OutputPath("string")
            .compressionType("string")
            .kmsKeyId("string")
            .build())
        .retryStrategy(TrainingJobRetryStrategyArgs.builder()
            .maximumRetryAttempts(0)
            .build())
        .deleteModelPackagesOnDestroy(false)
        .deleteVpcEnisOnDestroy(false)
        .enableInterContainerTrafficEncryption(false)
        .enableManagedSpotTraining(false)
        .enableNetworkIsolation(false)
        .environment(Map.of("string", "string"))
        .experimentConfig(TrainingJobExperimentConfigArgs.builder()
            .experimentName("string")
            .runName("string")
            .trialComponentDisplayName("string")
            .trialName("string")
            .build())
        .hyperParameters(Map.of("string", "string"))
        .infraCheckConfig(TrainingJobInfraCheckConfigArgs.builder()
            .enableInfraCheck(false)
            .build())
        .inputDataConfigs(TrainingJobInputDataConfigArgs.builder()
            .channelName("string")
            .compressionType("string")
            .contentType("string")
            .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                .fileSystemDataSource(TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs.builder()
                    .directoryPath("string")
                    .fileSystemAccessMode("string")
                    .fileSystemId("string")
                    .fileSystemType("string")
                    .build())
                .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                    .s3DataType("string")
                    .s3Uri("string")
                    .attributeNames("string")
                    .hubAccessConfig(TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs.builder()
                        .hubContentArn("string")
                        .build())
                    .instanceGroupNames("string")
                    .modelAccessConfig(TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs.builder()
                        .acceptEula(false)
                        .build())
                    .s3DataDistributionType("string")
                    .build())
                .build())
            .inputMode("string")
            .recordWrapperType("string")
            .shuffleConfig(TrainingJobInputDataConfigShuffleConfigArgs.builder()
                .seed(0)
                .build())
            .build())
        .mlflowConfig(TrainingJobMlflowConfigArgs.builder()
            .mlflowResourceArn("string")
            .mlflowExperimentName("string")
            .mlflowRunName("string")
            .build())
        .modelPackageConfig(TrainingJobModelPackageConfigArgs.builder()
            .modelPackageGroupArn("string")
            .sourceModelPackageArn("string")
            .build())
        .debugRuleConfigurations(TrainingJobDebugRuleConfigurationArgs.builder()
            .ruleConfigurationName("string")
            .ruleEvaluatorImage("string")
            .instanceType("string")
            .localPath("string")
            .ruleParameters(Map.of("string", "string"))
            .s3OutputPath("string")
            .volumeSizeInGb(0)
            .build())
        .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
            .algorithmName("string")
            .containerArguments("string")
            .containerEntrypoints("string")
            .enableSagemakerMetricsTimeSeries(false)
            .metricDefinitions(TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
                .name("string")
                .regex("string")
                .build())
            .trainingImage("string")
            .trainingImageConfig(TrainingJobAlgorithmSpecificationTrainingImageConfigArgs.builder()
                .trainingRepositoryAccessMode("string")
                .trainingRepositoryAuthConfig(TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs.builder()
                    .trainingRepositoryCredentialsProviderArn("string")
                    .build())
                .build())
            .trainingInputMode("string")
            .build())
        .serverlessJobConfig(TrainingJobServerlessJobConfigArgs.builder()
            .baseModelArn("string")
            .jobType("string")
            .acceptEula(false)
            .customizationTechnique("string")
            .evaluationType("string")
            .evaluatorArn("string")
            .peft("string")
            .build())
        .region("string")
        .remoteDebugConfig(TrainingJobRemoteDebugConfigArgs.builder()
            .enableRemoteDebug(false)
            .build())
        .resourceConfig(TrainingJobResourceConfigArgs.builder()
            .instanceCount(0)
            .instanceGroups(TrainingJobResourceConfigInstanceGroupArgs.builder()
                .instanceCount(0)
                .instanceGroupName("string")
                .instanceType("string")
                .build())
            .instancePlacementConfig(TrainingJobResourceConfigInstancePlacementConfigArgs.builder()
                .enableMultipleJobs(false)
                .placementSpecifications(TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs.builder()
                    .instanceCount(0)
                    .ultraServerId("string")
                    .build())
                .build())
            .instanceType("string")
            .keepAlivePeriodInSeconds(0)
            .trainingPlanArn("string")
            .volumeKmsKeyId("string")
            .volumeSizeInGb(0)
            .build())
        .profilerConfig(TrainingJobProfilerConfigArgs.builder()
            .disableProfiler(false)
            .profilingIntervalInMilliseconds(0)
            .profilingParameters(Map.of("string", "string"))
            .s3OutputPath("string")
            .build())
        .debugHookConfig(TrainingJobDebugHookConfigArgs.builder()
            .s3OutputPath("string")
            .collectionConfigurations(TrainingJobDebugHookConfigCollectionConfigurationArgs.builder()
                .collectionName("string")
                .collectionParameters(Map.of("string", "string"))
                .build())
            .hookParameters(Map.of("string", "string"))
            .localPath("string")
            .build())
        .profilerRuleConfigurations(TrainingJobProfilerRuleConfigurationArgs.builder()
            .ruleConfigurationName("string")
            .ruleEvaluatorImage("string")
            .instanceType("string")
            .localPath("string")
            .ruleParameters(Map.of("string", "string"))
            .s3OutputPath("string")
            .volumeSizeInGb(0)
            .build())
        .sessionChainingConfig(TrainingJobSessionChainingConfigArgs.builder()
            .enableSessionTagChaining(false)
            .build())
        .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
            .maxPendingTimeInSeconds(0)
            .maxRuntimeInSeconds(0)
            .maxWaitTimeInSeconds(0)
            .build())
        .tags(Map.of("string", "string"))
        .tensorBoardOutputConfig(TrainingJobTensorBoardOutputConfigArgs.builder()
            .s3OutputPath("string")
            .localPath("string")
            .build())
        .timeouts(TrainingJobTimeoutsArgs.builder()
            .create("string")
            .delete("string")
            .update("string")
            .build())
        .checkpointConfig(TrainingJobCheckpointConfigArgs.builder()
            .s3Uri("string")
            .localPath("string")
            .build())
        .vpcConfig(TrainingJobVpcConfigArgs.builder()
            .securityGroupIds("string")
            .subnets("string")
            .build())
        .build());
    
    training_job_resource = aws.sagemaker.TrainingJob("trainingJobResource",
        role_arn="string",
        training_job_name="string",
        output_data_config={
            "s3_output_path": "string",
            "compression_type": "string",
            "kms_key_id": "string",
        },
        retry_strategy={
            "maximum_retry_attempts": 0,
        },
        delete_model_packages_on_destroy=False,
        delete_vpc_enis_on_destroy=False,
        enable_inter_container_traffic_encryption=False,
        enable_managed_spot_training=False,
        enable_network_isolation=False,
        environment={
            "string": "string",
        },
        experiment_config={
            "experiment_name": "string",
            "run_name": "string",
            "trial_component_display_name": "string",
            "trial_name": "string",
        },
        hyper_parameters={
            "string": "string",
        },
        infra_check_config={
            "enable_infra_check": False,
        },
        input_data_configs=[{
            "channel_name": "string",
            "compression_type": "string",
            "content_type": "string",
            "data_source": {
                "file_system_data_source": {
                    "directory_path": "string",
                    "file_system_access_mode": "string",
                    "file_system_id": "string",
                    "file_system_type": "string",
                },
                "s3_data_source": {
                    "s3_data_type": "string",
                    "s3_uri": "string",
                    "attribute_names": ["string"],
                    "hub_access_config": {
                        "hub_content_arn": "string",
                    },
                    "instance_group_names": ["string"],
                    "model_access_config": {
                        "accept_eula": False,
                    },
                    "s3_data_distribution_type": "string",
                },
            },
            "input_mode": "string",
            "record_wrapper_type": "string",
            "shuffle_config": {
                "seed": 0,
            },
        }],
        mlflow_config={
            "mlflow_resource_arn": "string",
            "mlflow_experiment_name": "string",
            "mlflow_run_name": "string",
        },
        model_package_config={
            "model_package_group_arn": "string",
            "source_model_package_arn": "string",
        },
        debug_rule_configurations=[{
            "rule_configuration_name": "string",
            "rule_evaluator_image": "string",
            "instance_type": "string",
            "local_path": "string",
            "rule_parameters": {
                "string": "string",
            },
            "s3_output_path": "string",
            "volume_size_in_gb": 0,
        }],
        algorithm_specification={
            "algorithm_name": "string",
            "container_arguments": ["string"],
            "container_entrypoints": ["string"],
            "enable_sagemaker_metrics_time_series": False,
            "metric_definitions": [{
                "name": "string",
                "regex": "string",
            }],
            "training_image": "string",
            "training_image_config": {
                "training_repository_access_mode": "string",
                "training_repository_auth_config": {
                    "training_repository_credentials_provider_arn": "string",
                },
            },
            "training_input_mode": "string",
        },
        serverless_job_config={
            "base_model_arn": "string",
            "job_type": "string",
            "accept_eula": False,
            "customization_technique": "string",
            "evaluation_type": "string",
            "evaluator_arn": "string",
            "peft": "string",
        },
        region="string",
        remote_debug_config={
            "enable_remote_debug": False,
        },
        resource_config={
            "instance_count": 0,
            "instance_groups": [{
                "instance_count": 0,
                "instance_group_name": "string",
                "instance_type": "string",
            }],
            "instance_placement_config": {
                "enable_multiple_jobs": False,
                "placement_specifications": [{
                    "instance_count": 0,
                    "ultra_server_id": "string",
                }],
            },
            "instance_type": "string",
            "keep_alive_period_in_seconds": 0,
            "training_plan_arn": "string",
            "volume_kms_key_id": "string",
            "volume_size_in_gb": 0,
        },
        profiler_config={
            "disable_profiler": False,
            "profiling_interval_in_milliseconds": 0,
            "profiling_parameters": {
                "string": "string",
            },
            "s3_output_path": "string",
        },
        debug_hook_config={
            "s3_output_path": "string",
            "collection_configurations": [{
                "collection_name": "string",
                "collection_parameters": {
                    "string": "string",
                },
            }],
            "hook_parameters": {
                "string": "string",
            },
            "local_path": "string",
        },
        profiler_rule_configurations=[{
            "rule_configuration_name": "string",
            "rule_evaluator_image": "string",
            "instance_type": "string",
            "local_path": "string",
            "rule_parameters": {
                "string": "string",
            },
            "s3_output_path": "string",
            "volume_size_in_gb": 0,
        }],
        session_chaining_config={
            "enable_session_tag_chaining": False,
        },
        stopping_condition={
            "max_pending_time_in_seconds": 0,
            "max_runtime_in_seconds": 0,
            "max_wait_time_in_seconds": 0,
        },
        tags={
            "string": "string",
        },
        tensor_board_output_config={
            "s3_output_path": "string",
            "local_path": "string",
        },
        timeouts={
            "create": "string",
            "delete": "string",
            "update": "string",
        },
        checkpoint_config={
            "s3_uri": "string",
            "local_path": "string",
        },
        vpc_config={
            "security_group_ids": ["string"],
            "subnets": ["string"],
        })
    
    const trainingJobResource = new aws.sagemaker.TrainingJob("trainingJobResource", {
        roleArn: "string",
        trainingJobName: "string",
        outputDataConfig: {
            s3OutputPath: "string",
            compressionType: "string",
            kmsKeyId: "string",
        },
        retryStrategy: {
            maximumRetryAttempts: 0,
        },
        deleteModelPackagesOnDestroy: false,
        deleteVpcEnisOnDestroy: false,
        enableInterContainerTrafficEncryption: false,
        enableManagedSpotTraining: false,
        enableNetworkIsolation: false,
        environment: {
            string: "string",
        },
        experimentConfig: {
            experimentName: "string",
            runName: "string",
            trialComponentDisplayName: "string",
            trialName: "string",
        },
        hyperParameters: {
            string: "string",
        },
        infraCheckConfig: {
            enableInfraCheck: false,
        },
        inputDataConfigs: [{
            channelName: "string",
            compressionType: "string",
            contentType: "string",
            dataSource: {
                fileSystemDataSource: {
                    directoryPath: "string",
                    fileSystemAccessMode: "string",
                    fileSystemId: "string",
                    fileSystemType: "string",
                },
                s3DataSource: {
                    s3DataType: "string",
                    s3Uri: "string",
                    attributeNames: ["string"],
                    hubAccessConfig: {
                        hubContentArn: "string",
                    },
                    instanceGroupNames: ["string"],
                    modelAccessConfig: {
                        acceptEula: false,
                    },
                    s3DataDistributionType: "string",
                },
            },
            inputMode: "string",
            recordWrapperType: "string",
            shuffleConfig: {
                seed: 0,
            },
        }],
        mlflowConfig: {
            mlflowResourceArn: "string",
            mlflowExperimentName: "string",
            mlflowRunName: "string",
        },
        modelPackageConfig: {
            modelPackageGroupArn: "string",
            sourceModelPackageArn: "string",
        },
        debugRuleConfigurations: [{
            ruleConfigurationName: "string",
            ruleEvaluatorImage: "string",
            instanceType: "string",
            localPath: "string",
            ruleParameters: {
                string: "string",
            },
            s3OutputPath: "string",
            volumeSizeInGb: 0,
        }],
        algorithmSpecification: {
            algorithmName: "string",
            containerArguments: ["string"],
            containerEntrypoints: ["string"],
            enableSagemakerMetricsTimeSeries: false,
            metricDefinitions: [{
                name: "string",
                regex: "string",
            }],
            trainingImage: "string",
            trainingImageConfig: {
                trainingRepositoryAccessMode: "string",
                trainingRepositoryAuthConfig: {
                    trainingRepositoryCredentialsProviderArn: "string",
                },
            },
            trainingInputMode: "string",
        },
        serverlessJobConfig: {
            baseModelArn: "string",
            jobType: "string",
            acceptEula: false,
            customizationTechnique: "string",
            evaluationType: "string",
            evaluatorArn: "string",
            peft: "string",
        },
        region: "string",
        remoteDebugConfig: {
            enableRemoteDebug: false,
        },
        resourceConfig: {
            instanceCount: 0,
            instanceGroups: [{
                instanceCount: 0,
                instanceGroupName: "string",
                instanceType: "string",
            }],
            instancePlacementConfig: {
                enableMultipleJobs: false,
                placementSpecifications: [{
                    instanceCount: 0,
                    ultraServerId: "string",
                }],
            },
            instanceType: "string",
            keepAlivePeriodInSeconds: 0,
            trainingPlanArn: "string",
            volumeKmsKeyId: "string",
            volumeSizeInGb: 0,
        },
        profilerConfig: {
            disableProfiler: false,
            profilingIntervalInMilliseconds: 0,
            profilingParameters: {
                string: "string",
            },
            s3OutputPath: "string",
        },
        debugHookConfig: {
            s3OutputPath: "string",
            collectionConfigurations: [{
                collectionName: "string",
                collectionParameters: {
                    string: "string",
                },
            }],
            hookParameters: {
                string: "string",
            },
            localPath: "string",
        },
        profilerRuleConfigurations: [{
            ruleConfigurationName: "string",
            ruleEvaluatorImage: "string",
            instanceType: "string",
            localPath: "string",
            ruleParameters: {
                string: "string",
            },
            s3OutputPath: "string",
            volumeSizeInGb: 0,
        }],
        sessionChainingConfig: {
            enableSessionTagChaining: false,
        },
        stoppingCondition: {
            maxPendingTimeInSeconds: 0,
            maxRuntimeInSeconds: 0,
            maxWaitTimeInSeconds: 0,
        },
        tags: {
            string: "string",
        },
        tensorBoardOutputConfig: {
            s3OutputPath: "string",
            localPath: "string",
        },
        timeouts: {
            create: "string",
            "delete": "string",
            update: "string",
        },
        checkpointConfig: {
            s3Uri: "string",
            localPath: "string",
        },
        vpcConfig: {
            securityGroupIds: ["string"],
            subnets: ["string"],
        },
    });
    
    type: aws:sagemaker:TrainingJob
    properties:
        algorithmSpecification:
            algorithmName: string
            containerArguments:
                - string
            containerEntrypoints:
                - string
            enableSagemakerMetricsTimeSeries: false
            metricDefinitions:
                - name: string
                  regex: string
            trainingImage: string
            trainingImageConfig:
                trainingRepositoryAccessMode: string
                trainingRepositoryAuthConfig:
                    trainingRepositoryCredentialsProviderArn: string
            trainingInputMode: string
        checkpointConfig:
            localPath: string
            s3Uri: string
        debugHookConfig:
            collectionConfigurations:
                - collectionName: string
                  collectionParameters:
                    string: string
            hookParameters:
                string: string
            localPath: string
            s3OutputPath: string
        debugRuleConfigurations:
            - instanceType: string
              localPath: string
              ruleConfigurationName: string
              ruleEvaluatorImage: string
              ruleParameters:
                string: string
              s3OutputPath: string
              volumeSizeInGb: 0
        deleteModelPackagesOnDestroy: false
        deleteVpcEnisOnDestroy: false
        enableInterContainerTrafficEncryption: false
        enableManagedSpotTraining: false
        enableNetworkIsolation: false
        environment:
            string: string
        experimentConfig:
            experimentName: string
            runName: string
            trialComponentDisplayName: string
            trialName: string
        hyperParameters:
            string: string
        infraCheckConfig:
            enableInfraCheck: false
        inputDataConfigs:
            - channelName: string
              compressionType: string
              contentType: string
              dataSource:
                fileSystemDataSource:
                    directoryPath: string
                    fileSystemAccessMode: string
                    fileSystemId: string
                    fileSystemType: string
                s3DataSource:
                    attributeNames:
                        - string
                    hubAccessConfig:
                        hubContentArn: string
                    instanceGroupNames:
                        - string
                    modelAccessConfig:
                        acceptEula: false
                    s3DataDistributionType: string
                    s3DataType: string
                    s3Uri: string
              inputMode: string
              recordWrapperType: string
              shuffleConfig:
                seed: 0
        mlflowConfig:
            mlflowExperimentName: string
            mlflowResourceArn: string
            mlflowRunName: string
        modelPackageConfig:
            modelPackageGroupArn: string
            sourceModelPackageArn: string
        outputDataConfig:
            compressionType: string
            kmsKeyId: string
            s3OutputPath: string
        profilerConfig:
            disableProfiler: false
            profilingIntervalInMilliseconds: 0
            profilingParameters:
                string: string
            s3OutputPath: string
        profilerRuleConfigurations:
            - instanceType: string
              localPath: string
              ruleConfigurationName: string
              ruleEvaluatorImage: string
              ruleParameters:
                string: string
              s3OutputPath: string
              volumeSizeInGb: 0
        region: string
        remoteDebugConfig:
            enableRemoteDebug: false
        resourceConfig:
            instanceCount: 0
            instanceGroups:
                - instanceCount: 0
                  instanceGroupName: string
                  instanceType: string
            instancePlacementConfig:
                enableMultipleJobs: false
                placementSpecifications:
                    - instanceCount: 0
                      ultraServerId: string
            instanceType: string
            keepAlivePeriodInSeconds: 0
            trainingPlanArn: string
            volumeKmsKeyId: string
            volumeSizeInGb: 0
        retryStrategy:
            maximumRetryAttempts: 0
        roleArn: string
        serverlessJobConfig:
            acceptEula: false
            baseModelArn: string
            customizationTechnique: string
            evaluationType: string
            evaluatorArn: string
            jobType: string
            peft: string
        sessionChainingConfig:
            enableSessionTagChaining: false
        stoppingCondition:
            maxPendingTimeInSeconds: 0
            maxRuntimeInSeconds: 0
            maxWaitTimeInSeconds: 0
        tags:
            string: string
        tensorBoardOutputConfig:
            localPath: string
            s3OutputPath: string
        timeouts:
            create: string
            delete: string
            update: string
        trainingJobName: string
        vpcConfig:
            securityGroupIds:
                - string
            subnets:
                - string
    

    TrainingJob Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The TrainingJob resource accepts the following input properties:

    RoleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    TrainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    AlgorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    CheckpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    DebugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    DebugRuleConfigurations List<TrainingJobDebugRuleConfiguration>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    DeleteModelPackagesOnDestroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    DeleteVpcEnisOnDestroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    EnableInterContainerTrafficEncryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    EnableManagedSpotTraining bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    EnableNetworkIsolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    Environment Dictionary<string, string>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    ExperimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    HyperParameters Dictionary<string, string>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    InfraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    InputDataConfigs List<TrainingJobInputDataConfig>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    MlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    ModelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    OutputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    ProfilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    ProfilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    RemoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    ResourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    RetryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    ServerlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    SessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    StoppingCondition TrainingJobStoppingCondition
    Tags Dictionary<string, string>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    Timeouts TrainingJobTimeouts
    VpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    RoleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    TrainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    AlgorithmSpecification TrainingJobAlgorithmSpecificationArgs
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    CheckpointConfig TrainingJobCheckpointConfigArgs
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    DebugHookConfig TrainingJobDebugHookConfigArgs
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    DebugRuleConfigurations []TrainingJobDebugRuleConfigurationArgs
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    DeleteModelPackagesOnDestroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    DeleteVpcEnisOnDestroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    EnableInterContainerTrafficEncryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    EnableManagedSpotTraining bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    EnableNetworkIsolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    Environment map[string]string
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    ExperimentConfig TrainingJobExperimentConfigArgs
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    HyperParameters map[string]string
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    InfraCheckConfig TrainingJobInfraCheckConfigArgs
    Infrastructure health check configuration. See infraCheckConfig below.
    InputDataConfigs []TrainingJobInputDataConfigArgs
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    MlflowConfig TrainingJobMlflowConfigArgs
    MLflow integration configuration. See mlflowConfig below.
    ModelPackageConfig TrainingJobModelPackageConfigArgs
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    OutputDataConfig TrainingJobOutputDataConfigArgs

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    ProfilerConfig TrainingJobProfilerConfigArgs
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    ProfilerRuleConfigurations []TrainingJobProfilerRuleConfigurationArgs
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    RemoteDebugConfig TrainingJobRemoteDebugConfigArgs
    Configuration for remote debugging. See remoteDebugConfig below.
    ResourceConfig TrainingJobResourceConfigArgs
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    RetryStrategy TrainingJobRetryStrategyArgs
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    ServerlessJobConfig TrainingJobServerlessJobConfigArgs
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    SessionChainingConfig TrainingJobSessionChainingConfigArgs
    Configuration for session tag chaining. See sessionChainingConfig below.
    StoppingCondition TrainingJobStoppingConditionArgs
    Tags map[string]string
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TensorBoardOutputConfig TrainingJobTensorBoardOutputConfigArgs
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    Timeouts TrainingJobTimeoutsArgs
    VpcConfig TrainingJobVpcConfigArgs
    VPC configuration for the training job. See vpcConfig below.
    roleArn String
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    trainingJobName String
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    algorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    checkpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations List<TrainingJobDebugRuleConfiguration>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy Boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy Boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption Boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining Boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation Boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Map<String,String>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters Map<String,String>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs List<TrainingJobInputDataConfig>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    serverlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition TrainingJobStoppingCondition
    tags Map<String,String>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeouts
    vpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    roleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    trainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    algorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    checkpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations TrainingJobDebugRuleConfiguration[]
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment {[key: string]: string}
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters {[key: string]: string}
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs TrainingJobInputDataConfig[]
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations TrainingJobProfilerRuleConfiguration[]
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    serverlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition TrainingJobStoppingCondition
    tags {[key: string]: string}
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeouts
    vpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    role_arn str
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    training_job_name str
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    algorithm_specification TrainingJobAlgorithmSpecificationArgs
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    checkpoint_config TrainingJobCheckpointConfigArgs
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debug_hook_config TrainingJobDebugHookConfigArgs
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debug_rule_configurations Sequence[TrainingJobDebugRuleConfigurationArgs]
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    delete_model_packages_on_destroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    delete_vpc_enis_on_destroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enable_inter_container_traffic_encryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enable_managed_spot_training bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enable_network_isolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Mapping[str, str]
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experiment_config TrainingJobExperimentConfigArgs
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyper_parameters Mapping[str, str]
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infra_check_config TrainingJobInfraCheckConfigArgs
    Infrastructure health check configuration. See infraCheckConfig below.
    input_data_configs Sequence[TrainingJobInputDataConfigArgs]
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflow_config TrainingJobMlflowConfigArgs
    MLflow integration configuration. See mlflowConfig below.
    model_package_config TrainingJobModelPackageConfigArgs
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    output_data_config TrainingJobOutputDataConfigArgs

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profiler_config TrainingJobProfilerConfigArgs
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profiler_rule_configurations Sequence[TrainingJobProfilerRuleConfigurationArgs]
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region str
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remote_debug_config TrainingJobRemoteDebugConfigArgs
    Configuration for remote debugging. See remoteDebugConfig below.
    resource_config TrainingJobResourceConfigArgs
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retry_strategy TrainingJobRetryStrategyArgs
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    serverless_job_config TrainingJobServerlessJobConfigArgs
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    session_chaining_config TrainingJobSessionChainingConfigArgs
    Configuration for session tag chaining. See sessionChainingConfig below.
    stopping_condition TrainingJobStoppingConditionArgs
    tags Mapping[str, str]
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tensor_board_output_config TrainingJobTensorBoardOutputConfigArgs
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeoutsArgs
    vpc_config TrainingJobVpcConfigArgs
    VPC configuration for the training job. See vpcConfig below.
    roleArn String
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    trainingJobName String
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    algorithmSpecification Property Map
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    checkpointConfig Property Map
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig Property Map
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations List<Property Map>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy Boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy Boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption Boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining Boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation Boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Map<String>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig Property Map
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters Map<String>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig Property Map
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs List<Property Map>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig Property Map
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig Property Map
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig Property Map

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig Property Map
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations List<Property Map>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig Property Map
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig Property Map
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy Property Map
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    serverlessJobConfig Property Map
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig Property Map
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition Property Map
    tags Map<String>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tensorBoardOutputConfig Property Map
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts Property Map
    vpcConfig Property Map
    VPC configuration for the training job. See vpcConfig below.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the TrainingJob resource produces the following output properties:

    Arn string
    ARN of the Training Job.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll Dictionary<string, string>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    Arn string
    ARN of the Training Job.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll map[string]string
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    arn String
    ARN of the Training Job.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String,String>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    arn string
    ARN of the Training Job.
    id string
    The provider-assigned unique ID for this managed resource.
    tagsAll {[key: string]: string}
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    arn str
    ARN of the Training Job.
    id str
    The provider-assigned unique ID for this managed resource.
    tags_all Mapping[str, str]
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    arn String
    ARN of the Training Job.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

    Look up Existing TrainingJob Resource

    Get an existing TrainingJob resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: TrainingJobState, opts?: CustomResourceOptions): TrainingJob
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            algorithm_specification: Optional[TrainingJobAlgorithmSpecificationArgs] = None,
            arn: Optional[str] = None,
            checkpoint_config: Optional[TrainingJobCheckpointConfigArgs] = None,
            debug_hook_config: Optional[TrainingJobDebugHookConfigArgs] = None,
            debug_rule_configurations: Optional[Sequence[TrainingJobDebugRuleConfigurationArgs]] = None,
            delete_model_packages_on_destroy: Optional[bool] = None,
            delete_vpc_enis_on_destroy: Optional[bool] = None,
            enable_inter_container_traffic_encryption: Optional[bool] = None,
            enable_managed_spot_training: Optional[bool] = None,
            enable_network_isolation: Optional[bool] = None,
            environment: Optional[Mapping[str, str]] = None,
            experiment_config: Optional[TrainingJobExperimentConfigArgs] = None,
            hyper_parameters: Optional[Mapping[str, str]] = None,
            infra_check_config: Optional[TrainingJobInfraCheckConfigArgs] = None,
            input_data_configs: Optional[Sequence[TrainingJobInputDataConfigArgs]] = None,
            mlflow_config: Optional[TrainingJobMlflowConfigArgs] = None,
            model_package_config: Optional[TrainingJobModelPackageConfigArgs] = None,
            output_data_config: Optional[TrainingJobOutputDataConfigArgs] = None,
            profiler_config: Optional[TrainingJobProfilerConfigArgs] = None,
            profiler_rule_configurations: Optional[Sequence[TrainingJobProfilerRuleConfigurationArgs]] = None,
            region: Optional[str] = None,
            remote_debug_config: Optional[TrainingJobRemoteDebugConfigArgs] = None,
            resource_config: Optional[TrainingJobResourceConfigArgs] = None,
            retry_strategy: Optional[TrainingJobRetryStrategyArgs] = None,
            role_arn: Optional[str] = None,
            serverless_job_config: Optional[TrainingJobServerlessJobConfigArgs] = None,
            session_chaining_config: Optional[TrainingJobSessionChainingConfigArgs] = None,
            stopping_condition: Optional[TrainingJobStoppingConditionArgs] = None,
            tags: Optional[Mapping[str, str]] = None,
            tags_all: Optional[Mapping[str, str]] = None,
            tensor_board_output_config: Optional[TrainingJobTensorBoardOutputConfigArgs] = None,
            timeouts: Optional[TrainingJobTimeoutsArgs] = None,
            training_job_name: Optional[str] = None,
            vpc_config: Optional[TrainingJobVpcConfigArgs] = None) -> TrainingJob
    func GetTrainingJob(ctx *Context, name string, id IDInput, state *TrainingJobState, opts ...ResourceOption) (*TrainingJob, error)
    public static TrainingJob Get(string name, Input<string> id, TrainingJobState? state, CustomResourceOptions? opts = null)
    public static TrainingJob get(String name, Output<String> id, TrainingJobState state, CustomResourceOptions options)
    resources:  _:    type: aws:sagemaker:TrainingJob    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    AlgorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    Arn string
    ARN of the Training Job.
    CheckpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    DebugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    DebugRuleConfigurations List<TrainingJobDebugRuleConfiguration>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    DeleteModelPackagesOnDestroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    DeleteVpcEnisOnDestroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    EnableInterContainerTrafficEncryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    EnableManagedSpotTraining bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    EnableNetworkIsolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    Environment Dictionary<string, string>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    ExperimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    HyperParameters Dictionary<string, string>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    InfraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    InputDataConfigs List<TrainingJobInputDataConfig>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    MlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    ModelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    OutputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    ProfilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    ProfilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    RemoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    ResourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    RetryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    RoleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    ServerlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    SessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    StoppingCondition TrainingJobStoppingCondition
    Tags Dictionary<string, string>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll Dictionary<string, string>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    TensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    Timeouts TrainingJobTimeouts
    TrainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    VpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    AlgorithmSpecification TrainingJobAlgorithmSpecificationArgs
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    Arn string
    ARN of the Training Job.
    CheckpointConfig TrainingJobCheckpointConfigArgs
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    DebugHookConfig TrainingJobDebugHookConfigArgs
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    DebugRuleConfigurations []TrainingJobDebugRuleConfigurationArgs
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    DeleteModelPackagesOnDestroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    DeleteVpcEnisOnDestroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    EnableInterContainerTrafficEncryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    EnableManagedSpotTraining bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    EnableNetworkIsolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    Environment map[string]string
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    ExperimentConfig TrainingJobExperimentConfigArgs
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    HyperParameters map[string]string
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    InfraCheckConfig TrainingJobInfraCheckConfigArgs
    Infrastructure health check configuration. See infraCheckConfig below.
    InputDataConfigs []TrainingJobInputDataConfigArgs
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    MlflowConfig TrainingJobMlflowConfigArgs
    MLflow integration configuration. See mlflowConfig below.
    ModelPackageConfig TrainingJobModelPackageConfigArgs
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    OutputDataConfig TrainingJobOutputDataConfigArgs

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    ProfilerConfig TrainingJobProfilerConfigArgs
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    ProfilerRuleConfigurations []TrainingJobProfilerRuleConfigurationArgs
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    RemoteDebugConfig TrainingJobRemoteDebugConfigArgs
    Configuration for remote debugging. See remoteDebugConfig below.
    ResourceConfig TrainingJobResourceConfigArgs
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    RetryStrategy TrainingJobRetryStrategyArgs
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    RoleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    ServerlessJobConfig TrainingJobServerlessJobConfigArgs
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    SessionChainingConfig TrainingJobSessionChainingConfigArgs
    Configuration for session tag chaining. See sessionChainingConfig below.
    StoppingCondition TrainingJobStoppingConditionArgs
    Tags map[string]string
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll map[string]string
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    TensorBoardOutputConfig TrainingJobTensorBoardOutputConfigArgs
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    Timeouts TrainingJobTimeoutsArgs
    TrainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    VpcConfig TrainingJobVpcConfigArgs
    VPC configuration for the training job. See vpcConfig below.
    algorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    arn String
    ARN of the Training Job.
    checkpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations List<TrainingJobDebugRuleConfiguration>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy Boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy Boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption Boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining Boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation Boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Map<String,String>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters Map<String,String>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs List<TrainingJobInputDataConfig>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    roleArn String
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    serverlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition TrainingJobStoppingCondition
    tags Map<String,String>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String,String>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeouts
    trainingJobName String
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    vpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    algorithmSpecification TrainingJobAlgorithmSpecification
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    arn string
    ARN of the Training Job.
    checkpointConfig TrainingJobCheckpointConfig
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig TrainingJobDebugHookConfig
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations TrainingJobDebugRuleConfiguration[]
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment {[key: string]: string}
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig TrainingJobExperimentConfig
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters {[key: string]: string}
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig TrainingJobInfraCheckConfig
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs TrainingJobInputDataConfig[]
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig TrainingJobMlflowConfig
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig TrainingJobModelPackageConfig
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig TrainingJobOutputDataConfig

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig TrainingJobProfilerConfig
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations TrainingJobProfilerRuleConfiguration[]
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig TrainingJobRemoteDebugConfig
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig TrainingJobResourceConfig
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy TrainingJobRetryStrategy
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    roleArn string
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    serverlessJobConfig TrainingJobServerlessJobConfig
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig TrainingJobSessionChainingConfig
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition TrainingJobStoppingCondition
    tags {[key: string]: string}
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll {[key: string]: string}
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeouts
    trainingJobName string
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    vpcConfig TrainingJobVpcConfig
    VPC configuration for the training job. See vpcConfig below.
    algorithm_specification TrainingJobAlgorithmSpecificationArgs
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    arn str
    ARN of the Training Job.
    checkpoint_config TrainingJobCheckpointConfigArgs
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debug_hook_config TrainingJobDebugHookConfigArgs
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debug_rule_configurations Sequence[TrainingJobDebugRuleConfigurationArgs]
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    delete_model_packages_on_destroy bool
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    delete_vpc_enis_on_destroy bool
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enable_inter_container_traffic_encryption bool
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enable_managed_spot_training bool
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enable_network_isolation bool
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Mapping[str, str]
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experiment_config TrainingJobExperimentConfigArgs
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyper_parameters Mapping[str, str]
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infra_check_config TrainingJobInfraCheckConfigArgs
    Infrastructure health check configuration. See infraCheckConfig below.
    input_data_configs Sequence[TrainingJobInputDataConfigArgs]
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflow_config TrainingJobMlflowConfigArgs
    MLflow integration configuration. See mlflowConfig below.
    model_package_config TrainingJobModelPackageConfigArgs
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    output_data_config TrainingJobOutputDataConfigArgs

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profiler_config TrainingJobProfilerConfigArgs
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profiler_rule_configurations Sequence[TrainingJobProfilerRuleConfigurationArgs]
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region str
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remote_debug_config TrainingJobRemoteDebugConfigArgs
    Configuration for remote debugging. See remoteDebugConfig below.
    resource_config TrainingJobResourceConfigArgs
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retry_strategy TrainingJobRetryStrategyArgs
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    role_arn str
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    serverless_job_config TrainingJobServerlessJobConfigArgs
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    session_chaining_config TrainingJobSessionChainingConfigArgs
    Configuration for session tag chaining. See sessionChainingConfig below.
    stopping_condition TrainingJobStoppingConditionArgs
    tags Mapping[str, str]
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tags_all Mapping[str, str]
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    tensor_board_output_config TrainingJobTensorBoardOutputConfigArgs
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts TrainingJobTimeoutsArgs
    training_job_name str
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    vpc_config TrainingJobVpcConfigArgs
    VPC configuration for the training job. See vpcConfig below.
    algorithmSpecification Property Map
    Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.
    arn String
    ARN of the Training Job.
    checkpointConfig Property Map
    Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.
    debugHookConfig Property Map
    Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.
    debugRuleConfigurations List<Property Map>
    List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.
    deleteModelPackagesOnDestroy Boolean
    Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.
    deleteVpcEnisOnDestroy Boolean
    Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.
    enableInterContainerTrafficEncryption Boolean
    Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.
    enableManagedSpotTraining Boolean
    Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.
    enableNetworkIsolation Boolean
    Whether to isolate the training container from the network. No inbound or outbound network calls can be made.
    environment Map<String>
    Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.
    experimentConfig Property Map
    Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.
    hyperParameters Map<String>
    Map of hyperparameters for the training algorithm. Maximum of 100 entries.
    infraCheckConfig Property Map
    Infrastructure health check configuration. See infraCheckConfig below.
    inputDataConfigs List<Property Map>
    List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.
    mlflowConfig Property Map
    MLflow integration configuration. See mlflowConfig below.
    modelPackageConfig Property Map
    Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.
    outputDataConfig Property Map

    Location of the output data from the training job. See outputDataConfig below.

    The following arguments are optional:

    profilerConfig Property Map
    Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.
    profilerRuleConfigurations List<Property Map>
    List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    remoteDebugConfig Property Map
    Configuration for remote debugging. See remoteDebugConfig below.
    resourceConfig Property Map
    Resources for the training job, including compute instances and storage volumes. See resourceConfig below.
    retryStrategy Property Map
    Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.
    roleArn String
    ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.
    serverlessJobConfig Property Map
    Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.
    sessionChainingConfig Property Map
    Configuration for session tag chaining. See sessionChainingConfig below.
    stoppingCondition Property Map
    tags Map<String>
    Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String>
    Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.
    tensorBoardOutputConfig Property Map
    Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.
    timeouts Property Map
    trainingJobName String
    Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.
    vpcConfig Property Map
    VPC configuration for the training job. See vpcConfig below.

    Supporting Types

    TrainingJobAlgorithmSpecification, TrainingJobAlgorithmSpecificationArgs

    AlgorithmName string
    Name or ARN of the algorithm resource to use for the training job.
    ContainerArguments List<string>
    List of arguments for the container entrypoint. Maximum of 100 entries.
    ContainerEntrypoints List<string>
    List of entrypoint commands for the container. Maximum of 100 entries.
    EnableSagemakerMetricsTimeSeries bool
    Whether to enable SageMaker AI metrics time series collection.
    MetricDefinitions List<TrainingJobAlgorithmSpecificationMetricDefinition>
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    TrainingImage string
    Registry path of the Docker image that contains the training algorithm.
    TrainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig
    Training image configuration. See trainingImageConfig below.
    TrainingInputMode string
    Input mode for the training data. Valid values: File, Pipe, FastFile.
    AlgorithmName string
    Name or ARN of the algorithm resource to use for the training job.
    ContainerArguments []string
    List of arguments for the container entrypoint. Maximum of 100 entries.
    ContainerEntrypoints []string
    List of entrypoint commands for the container. Maximum of 100 entries.
    EnableSagemakerMetricsTimeSeries bool
    Whether to enable SageMaker AI metrics time series collection.
    MetricDefinitions []TrainingJobAlgorithmSpecificationMetricDefinition
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    TrainingImage string
    Registry path of the Docker image that contains the training algorithm.
    TrainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig
    Training image configuration. See trainingImageConfig below.
    TrainingInputMode string
    Input mode for the training data. Valid values: File, Pipe, FastFile.
    algorithmName String
    Name or ARN of the algorithm resource to use for the training job.
    containerArguments List<String>
    List of arguments for the container entrypoint. Maximum of 100 entries.
    containerEntrypoints List<String>
    List of entrypoint commands for the container. Maximum of 100 entries.
    enableSagemakerMetricsTimeSeries Boolean
    Whether to enable SageMaker AI metrics time series collection.
    metricDefinitions List<TrainingJobAlgorithmSpecificationMetricDefinition>
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    trainingImage String
    Registry path of the Docker image that contains the training algorithm.
    trainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig
    Training image configuration. See trainingImageConfig below.
    trainingInputMode String
    Input mode for the training data. Valid values: File, Pipe, FastFile.
    algorithmName string
    Name or ARN of the algorithm resource to use for the training job.
    containerArguments string[]
    List of arguments for the container entrypoint. Maximum of 100 entries.
    containerEntrypoints string[]
    List of entrypoint commands for the container. Maximum of 100 entries.
    enableSagemakerMetricsTimeSeries boolean
    Whether to enable SageMaker AI metrics time series collection.
    metricDefinitions TrainingJobAlgorithmSpecificationMetricDefinition[]
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    trainingImage string
    Registry path of the Docker image that contains the training algorithm.
    trainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig
    Training image configuration. See trainingImageConfig below.
    trainingInputMode string
    Input mode for the training data. Valid values: File, Pipe, FastFile.
    algorithm_name str
    Name or ARN of the algorithm resource to use for the training job.
    container_arguments Sequence[str]
    List of arguments for the container entrypoint. Maximum of 100 entries.
    container_entrypoints Sequence[str]
    List of entrypoint commands for the container. Maximum of 100 entries.
    enable_sagemaker_metrics_time_series bool
    Whether to enable SageMaker AI metrics time series collection.
    metric_definitions Sequence[TrainingJobAlgorithmSpecificationMetricDefinition]
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    training_image str
    Registry path of the Docker image that contains the training algorithm.
    training_image_config TrainingJobAlgorithmSpecificationTrainingImageConfig
    Training image configuration. See trainingImageConfig below.
    training_input_mode str
    Input mode for the training data. Valid values: File, Pipe, FastFile.
    algorithmName String
    Name or ARN of the algorithm resource to use for the training job.
    containerArguments List<String>
    List of arguments for the container entrypoint. Maximum of 100 entries.
    containerEntrypoints List<String>
    List of entrypoint commands for the container. Maximum of 100 entries.
    enableSagemakerMetricsTimeSeries Boolean
    Whether to enable SageMaker AI metrics time series collection.
    metricDefinitions List<Property Map>
    List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
    trainingImage String
    Registry path of the Docker image that contains the training algorithm.
    trainingImageConfig Property Map
    Training image configuration. See trainingImageConfig below.
    trainingInputMode String
    Input mode for the training data. Valid values: File, Pipe, FastFile.

    TrainingJobAlgorithmSpecificationMetricDefinition, TrainingJobAlgorithmSpecificationMetricDefinitionArgs

    Name string
    Name of the metric.
    Regex string
    Regular expression that searches the output of the training job and captures the value of the metric.
    Name string
    Name of the metric.
    Regex string
    Regular expression that searches the output of the training job and captures the value of the metric.
    name String
    Name of the metric.
    regex String
    Regular expression that searches the output of the training job and captures the value of the metric.
    name string
    Name of the metric.
    regex string
    Regular expression that searches the output of the training job and captures the value of the metric.
    name str
    Name of the metric.
    regex str
    Regular expression that searches the output of the training job and captures the value of the metric.
    name String
    Name of the metric.
    regex String
    Regular expression that searches the output of the training job and captures the value of the metric.

    TrainingJobAlgorithmSpecificationTrainingImageConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigArgs

    TrainingRepositoryAccessMode string
    Access mode for the training image repository.
    TrainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.
    TrainingRepositoryAccessMode string
    Access mode for the training image repository.
    TrainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.
    trainingRepositoryAccessMode String
    Access mode for the training image repository.
    trainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.
    trainingRepositoryAccessMode string
    Access mode for the training image repository.
    trainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.
    training_repository_access_mode str
    Access mode for the training image repository.
    training_repository_auth_config TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.
    trainingRepositoryAccessMode String
    Access mode for the training image repository.
    trainingRepositoryAuthConfig Property Map
    Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

    TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs

    TrainingRepositoryCredentialsProviderArn string
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.
    TrainingRepositoryCredentialsProviderArn string
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.
    trainingRepositoryCredentialsProviderArn String
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.
    trainingRepositoryCredentialsProviderArn string
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.
    training_repository_credentials_provider_arn str
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.
    trainingRepositoryCredentialsProviderArn String
    ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

    TrainingJobCheckpointConfig, TrainingJobCheckpointConfigArgs

    S3Uri string
    S3 URI where checkpoints are stored.
    LocalPath string
    Local path where checkpoints are written.
    S3Uri string
    S3 URI where checkpoints are stored.
    LocalPath string
    Local path where checkpoints are written.
    s3Uri String
    S3 URI where checkpoints are stored.
    localPath String
    Local path where checkpoints are written.
    s3Uri string
    S3 URI where checkpoints are stored.
    localPath string
    Local path where checkpoints are written.
    s3_uri str
    S3 URI where checkpoints are stored.
    local_path str
    Local path where checkpoints are written.
    s3Uri String
    S3 URI where checkpoints are stored.
    localPath String
    Local path where checkpoints are written.

    TrainingJobDebugHookConfig, TrainingJobDebugHookConfigArgs

    S3OutputPath string
    S3 URI where debug output is stored.
    CollectionConfigurations List<TrainingJobDebugHookConfigCollectionConfiguration>
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    HookParameters Dictionary<string, string>
    Map of parameters for the debug hook. Maximum of 20 entries.
    LocalPath string
    Local path where debug output is written.
    S3OutputPath string
    S3 URI where debug output is stored.
    CollectionConfigurations []TrainingJobDebugHookConfigCollectionConfiguration
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    HookParameters map[string]string
    Map of parameters for the debug hook. Maximum of 20 entries.
    LocalPath string
    Local path where debug output is written.
    s3OutputPath String
    S3 URI where debug output is stored.
    collectionConfigurations List<TrainingJobDebugHookConfigCollectionConfiguration>
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    hookParameters Map<String,String>
    Map of parameters for the debug hook. Maximum of 20 entries.
    localPath String
    Local path where debug output is written.
    s3OutputPath string
    S3 URI where debug output is stored.
    collectionConfigurations TrainingJobDebugHookConfigCollectionConfiguration[]
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    hookParameters {[key: string]: string}
    Map of parameters for the debug hook. Maximum of 20 entries.
    localPath string
    Local path where debug output is written.
    s3_output_path str
    S3 URI where debug output is stored.
    collection_configurations Sequence[TrainingJobDebugHookConfigCollectionConfiguration]
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    hook_parameters Mapping[str, str]
    Map of parameters for the debug hook. Maximum of 20 entries.
    local_path str
    Local path where debug output is written.
    s3OutputPath String
    S3 URI where debug output is stored.
    collectionConfigurations List<Property Map>
    List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
    hookParameters Map<String>
    Map of parameters for the debug hook. Maximum of 20 entries.
    localPath String
    Local path where debug output is written.

    TrainingJobDebugHookConfigCollectionConfiguration, TrainingJobDebugHookConfigCollectionConfigurationArgs

    CollectionName string
    Name of the tensor collection.
    CollectionParameters Dictionary<string, string>
    Map of parameters for the tensor collection.
    CollectionName string
    Name of the tensor collection.
    CollectionParameters map[string]string
    Map of parameters for the tensor collection.
    collectionName String
    Name of the tensor collection.
    collectionParameters Map<String,String>
    Map of parameters for the tensor collection.
    collectionName string
    Name of the tensor collection.
    collectionParameters {[key: string]: string}
    Map of parameters for the tensor collection.
    collection_name str
    Name of the tensor collection.
    collection_parameters Mapping[str, str]
    Map of parameters for the tensor collection.
    collectionName String
    Name of the tensor collection.
    collectionParameters Map<String>
    Map of parameters for the tensor collection.

    TrainingJobDebugRuleConfiguration, TrainingJobDebugRuleConfigurationArgs

    RuleConfigurationName string
    Name of the rule configuration. Must be between 1 and 256 characters.
    RuleEvaluatorImage string
    Docker image URI for the rule evaluator.
    InstanceType string
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    LocalPath string
    Local path where debug rule output is written.
    RuleParameters Dictionary<string, string>
    Map of parameters for the rule configuration. Maximum of 100 entries.
    S3OutputPath string
    S3 URI where rule output is stored.
    VolumeSizeInGb int
    Size of the storage volume for the rule evaluator, in GB.
    RuleConfigurationName string
    Name of the rule configuration. Must be between 1 and 256 characters.
    RuleEvaluatorImage string
    Docker image URI for the rule evaluator.
    InstanceType string
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    LocalPath string
    Local path where debug rule output is written.
    RuleParameters map[string]string
    Map of parameters for the rule configuration. Maximum of 100 entries.
    S3OutputPath string
    S3 URI where rule output is stored.
    VolumeSizeInGb int
    Size of the storage volume for the rule evaluator, in GB.
    ruleConfigurationName String
    Name of the rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage String
    Docker image URI for the rule evaluator.
    instanceType String
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath String
    Local path where debug rule output is written.
    ruleParameters Map<String,String>
    Map of parameters for the rule configuration. Maximum of 100 entries.
    s3OutputPath String
    S3 URI where rule output is stored.
    volumeSizeInGb Integer
    Size of the storage volume for the rule evaluator, in GB.
    ruleConfigurationName string
    Name of the rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage string
    Docker image URI for the rule evaluator.
    instanceType string
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath string
    Local path where debug rule output is written.
    ruleParameters {[key: string]: string}
    Map of parameters for the rule configuration. Maximum of 100 entries.
    s3OutputPath string
    S3 URI where rule output is stored.
    volumeSizeInGb number
    Size of the storage volume for the rule evaluator, in GB.
    rule_configuration_name str
    Name of the rule configuration. Must be between 1 and 256 characters.
    rule_evaluator_image str
    Docker image URI for the rule evaluator.
    instance_type str
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    local_path str
    Local path where debug rule output is written.
    rule_parameters Mapping[str, str]
    Map of parameters for the rule configuration. Maximum of 100 entries.
    s3_output_path str
    S3 URI where rule output is stored.
    volume_size_in_gb int
    Size of the storage volume for the rule evaluator, in GB.
    ruleConfigurationName String
    Name of the rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage String
    Docker image URI for the rule evaluator.
    instanceType String
    Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath String
    Local path where debug rule output is written.
    ruleParameters Map<String>
    Map of parameters for the rule configuration. Maximum of 100 entries.
    s3OutputPath String
    S3 URI where rule output is stored.
    volumeSizeInGb Number
    Size of the storage volume for the rule evaluator, in GB.

    TrainingJobExperimentConfig, TrainingJobExperimentConfigArgs

    ExperimentName string
    Name of the SageMaker AI Experiment to associate with.
    RunName string
    Name of the Experiment Run to associate with.
    TrialComponentDisplayName string
    Display name for the trial component.
    TrialName string
    Name of the SageMaker AI Trial to associate with.
    ExperimentName string
    Name of the SageMaker AI Experiment to associate with.
    RunName string
    Name of the Experiment Run to associate with.
    TrialComponentDisplayName string
    Display name for the trial component.
    TrialName string
    Name of the SageMaker AI Trial to associate with.
    experimentName String
    Name of the SageMaker AI Experiment to associate with.
    runName String
    Name of the Experiment Run to associate with.
    trialComponentDisplayName String
    Display name for the trial component.
    trialName String
    Name of the SageMaker AI Trial to associate with.
    experimentName string
    Name of the SageMaker AI Experiment to associate with.
    runName string
    Name of the Experiment Run to associate with.
    trialComponentDisplayName string
    Display name for the trial component.
    trialName string
    Name of the SageMaker AI Trial to associate with.
    experiment_name str
    Name of the SageMaker AI Experiment to associate with.
    run_name str
    Name of the Experiment Run to associate with.
    trial_component_display_name str
    Display name for the trial component.
    trial_name str
    Name of the SageMaker AI Trial to associate with.
    experimentName String
    Name of the SageMaker AI Experiment to associate with.
    runName String
    Name of the Experiment Run to associate with.
    trialComponentDisplayName String
    Display name for the trial component.
    trialName String
    Name of the SageMaker AI Trial to associate with.

    TrainingJobInfraCheckConfig, TrainingJobInfraCheckConfigArgs

    EnableInfraCheck bool
    Whether to enable infrastructure health checks before training.
    EnableInfraCheck bool
    Whether to enable infrastructure health checks before training.
    enableInfraCheck Boolean
    Whether to enable infrastructure health checks before training.
    enableInfraCheck boolean
    Whether to enable infrastructure health checks before training.
    enable_infra_check bool
    Whether to enable infrastructure health checks before training.
    enableInfraCheck Boolean
    Whether to enable infrastructure health checks before training.

    TrainingJobInputDataConfig, TrainingJobInputDataConfigArgs

    ChannelName string
    Name of the channel. Must be between 1 and 64 characters.
    CompressionType string
    Compression type for the input data. Valid values: None, Gzip.
    ContentType string
    MIME type of the input data.
    DataSource TrainingJobInputDataConfigDataSource
    Location of the channel data. See dataSource below.
    InputMode string
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    RecordWrapperType string
    Record wrapper type. Valid values: None, RecordIO.
    ShuffleConfig TrainingJobInputDataConfigShuffleConfig
    Configuration for shuffling data in the channel. See shuffleConfig below.
    ChannelName string
    Name of the channel. Must be between 1 and 64 characters.
    CompressionType string
    Compression type for the input data. Valid values: None, Gzip.
    ContentType string
    MIME type of the input data.
    DataSource TrainingJobInputDataConfigDataSource
    Location of the channel data. See dataSource below.
    InputMode string
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    RecordWrapperType string
    Record wrapper type. Valid values: None, RecordIO.
    ShuffleConfig TrainingJobInputDataConfigShuffleConfig
    Configuration for shuffling data in the channel. See shuffleConfig below.
    channelName String
    Name of the channel. Must be between 1 and 64 characters.
    compressionType String
    Compression type for the input data. Valid values: None, Gzip.
    contentType String
    MIME type of the input data.
    dataSource TrainingJobInputDataConfigDataSource
    Location of the channel data. See dataSource below.
    inputMode String
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    recordWrapperType String
    Record wrapper type. Valid values: None, RecordIO.
    shuffleConfig TrainingJobInputDataConfigShuffleConfig
    Configuration for shuffling data in the channel. See shuffleConfig below.
    channelName string
    Name of the channel. Must be between 1 and 64 characters.
    compressionType string
    Compression type for the input data. Valid values: None, Gzip.
    contentType string
    MIME type of the input data.
    dataSource TrainingJobInputDataConfigDataSource
    Location of the channel data. See dataSource below.
    inputMode string
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    recordWrapperType string
    Record wrapper type. Valid values: None, RecordIO.
    shuffleConfig TrainingJobInputDataConfigShuffleConfig
    Configuration for shuffling data in the channel. See shuffleConfig below.
    channel_name str
    Name of the channel. Must be between 1 and 64 characters.
    compression_type str
    Compression type for the input data. Valid values: None, Gzip.
    content_type str
    MIME type of the input data.
    data_source TrainingJobInputDataConfigDataSource
    Location of the channel data. See dataSource below.
    input_mode str
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    record_wrapper_type str
    Record wrapper type. Valid values: None, RecordIO.
    shuffle_config TrainingJobInputDataConfigShuffleConfig
    Configuration for shuffling data in the channel. See shuffleConfig below.
    channelName String
    Name of the channel. Must be between 1 and 64 characters.
    compressionType String
    Compression type for the input data. Valid values: None, Gzip.
    contentType String
    MIME type of the input data.
    dataSource Property Map
    Location of the channel data. See dataSource below.
    inputMode String
    Input mode for the channel data. Valid values: File, Pipe, FastFile.
    recordWrapperType String
    Record wrapper type. Valid values: None, RecordIO.
    shuffleConfig Property Map
    Configuration for shuffling data in the channel. See shuffleConfig below.

    TrainingJobInputDataConfigDataSource, TrainingJobInputDataConfigDataSourceArgs

    FileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource
    File system data source. See fileSystemDataSource below.
    S3DataSource TrainingJobInputDataConfigDataSourceS3DataSource
    S3 data source. See s3DataSource below.
    FileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource
    File system data source. See fileSystemDataSource below.
    S3DataSource TrainingJobInputDataConfigDataSourceS3DataSource
    S3 data source. See s3DataSource below.
    fileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource
    File system data source. See fileSystemDataSource below.
    s3DataSource TrainingJobInputDataConfigDataSourceS3DataSource
    S3 data source. See s3DataSource below.
    fileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource
    File system data source. See fileSystemDataSource below.
    s3DataSource TrainingJobInputDataConfigDataSourceS3DataSource
    S3 data source. See s3DataSource below.
    file_system_data_source TrainingJobInputDataConfigDataSourceFileSystemDataSource
    File system data source. See fileSystemDataSource below.
    s3_data_source TrainingJobInputDataConfigDataSourceS3DataSource
    S3 data source. See s3DataSource below.
    fileSystemDataSource Property Map
    File system data source. See fileSystemDataSource below.
    s3DataSource Property Map
    S3 data source. See s3DataSource below.

    TrainingJobInputDataConfigDataSourceFileSystemDataSource, TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs

    DirectoryPath string
    Full path to the directory on the file system.
    FileSystemAccessMode string
    Access mode for the file system. Valid values: ro, rw.
    FileSystemId string
    File system ID.
    FileSystemType string
    File system type. Valid values: EFS, FSxLustre.
    DirectoryPath string
    Full path to the directory on the file system.
    FileSystemAccessMode string
    Access mode for the file system. Valid values: ro, rw.
    FileSystemId string
    File system ID.
    FileSystemType string
    File system type. Valid values: EFS, FSxLustre.
    directoryPath String
    Full path to the directory on the file system.
    fileSystemAccessMode String
    Access mode for the file system. Valid values: ro, rw.
    fileSystemId String
    File system ID.
    fileSystemType String
    File system type. Valid values: EFS, FSxLustre.
    directoryPath string
    Full path to the directory on the file system.
    fileSystemAccessMode string
    Access mode for the file system. Valid values: ro, rw.
    fileSystemId string
    File system ID.
    fileSystemType string
    File system type. Valid values: EFS, FSxLustre.
    directory_path str
    Full path to the directory on the file system.
    file_system_access_mode str
    Access mode for the file system. Valid values: ro, rw.
    file_system_id str
    File system ID.
    file_system_type str
    File system type. Valid values: EFS, FSxLustre.
    directoryPath String
    Full path to the directory on the file system.
    fileSystemAccessMode String
    Access mode for the file system. Valid values: ro, rw.
    fileSystemId String
    File system ID.
    fileSystemType String
    File system type. Valid values: EFS, FSxLustre.

    TrainingJobInputDataConfigDataSourceS3DataSource, TrainingJobInputDataConfigDataSourceS3DataSourceArgs

    S3DataType string
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    S3Uri string
    S3 URI of the data.
    AttributeNames List<string>
    List of attribute names to include in the training dataset. Maximum of 16.
    HubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    InstanceGroupNames List<string>
    List of instance group names for the training data distribution. Maximum of 5.
    ModelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig
    Model access configuration. See modelAccessConfig below.
    S3DataDistributionType string
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.
    S3DataType string
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    S3Uri string
    S3 URI of the data.
    AttributeNames []string
    List of attribute names to include in the training dataset. Maximum of 16.
    HubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    InstanceGroupNames []string
    List of instance group names for the training data distribution. Maximum of 5.
    ModelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig
    Model access configuration. See modelAccessConfig below.
    S3DataDistributionType string
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.
    s3DataType String
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    s3Uri String
    S3 URI of the data.
    attributeNames List<String>
    List of attribute names to include in the training dataset. Maximum of 16.
    hubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    instanceGroupNames List<String>
    List of instance group names for the training data distribution. Maximum of 5.
    modelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig
    Model access configuration. See modelAccessConfig below.
    s3DataDistributionType String
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.
    s3DataType string
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    s3Uri string
    S3 URI of the data.
    attributeNames string[]
    List of attribute names to include in the training dataset. Maximum of 16.
    hubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    instanceGroupNames string[]
    List of instance group names for the training data distribution. Maximum of 5.
    modelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig
    Model access configuration. See modelAccessConfig below.
    s3DataDistributionType string
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.
    s3_data_type str
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    s3_uri str
    S3 URI of the data.
    attribute_names Sequence[str]
    List of attribute names to include in the training dataset. Maximum of 16.
    hub_access_config TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    instance_group_names Sequence[str]
    List of instance group names for the training data distribution. Maximum of 5.
    model_access_config TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig
    Model access configuration. See modelAccessConfig below.
    s3_data_distribution_type str
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.
    s3DataType String
    S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
    s3Uri String
    S3 URI of the data.
    attributeNames List<String>
    List of attribute names to include in the training dataset. Maximum of 16.
    hubAccessConfig Property Map
    SageMaker AI Hub access configuration. See hubAccessConfig below.
    instanceGroupNames List<String>
    List of instance group names for the training data distribution. Maximum of 5.
    modelAccessConfig Property Map
    Model access configuration. See modelAccessConfig below.
    s3DataDistributionType String
    Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

    TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs

    HubContentArn string
    ARN of the hub content.
    HubContentArn string
    ARN of the hub content.
    hubContentArn String
    ARN of the hub content.
    hubContentArn string
    ARN of the hub content.
    hub_content_arn str
    ARN of the hub content.
    hubContentArn String
    ARN of the hub content.

    TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs

    AcceptEula bool
    Whether to accept the model EULA.
    AcceptEula bool
    Whether to accept the model EULA.
    acceptEula Boolean
    Whether to accept the model EULA.
    acceptEula boolean
    Whether to accept the model EULA.
    accept_eula bool
    Whether to accept the model EULA.
    acceptEula Boolean
    Whether to accept the model EULA.

    TrainingJobInputDataConfigShuffleConfig, TrainingJobInputDataConfigShuffleConfigArgs

    Seed int
    Seed value used to shuffle the training data.
    Seed int
    Seed value used to shuffle the training data.
    seed Integer
    Seed value used to shuffle the training data.
    seed number
    Seed value used to shuffle the training data.
    seed int
    Seed value used to shuffle the training data.
    seed Number
    Seed value used to shuffle the training data.

    TrainingJobMlflowConfig, TrainingJobMlflowConfigArgs

    MlflowResourceArn string
    ARN of the MLflow tracking server.
    MlflowExperimentName string
    Name of the MLflow experiment.
    MlflowRunName string
    Name of the MLflow run.
    MlflowResourceArn string
    ARN of the MLflow tracking server.
    MlflowExperimentName string
    Name of the MLflow experiment.
    MlflowRunName string
    Name of the MLflow run.
    mlflowResourceArn String
    ARN of the MLflow tracking server.
    mlflowExperimentName String
    Name of the MLflow experiment.
    mlflowRunName String
    Name of the MLflow run.
    mlflowResourceArn string
    ARN of the MLflow tracking server.
    mlflowExperimentName string
    Name of the MLflow experiment.
    mlflowRunName string
    Name of the MLflow run.
    mlflow_resource_arn str
    ARN of the MLflow tracking server.
    mlflow_experiment_name str
    Name of the MLflow experiment.
    mlflow_run_name str
    Name of the MLflow run.
    mlflowResourceArn String
    ARN of the MLflow tracking server.
    mlflowExperimentName String
    Name of the MLflow experiment.
    mlflowRunName String
    Name of the MLflow run.

    TrainingJobModelPackageConfig, TrainingJobModelPackageConfigArgs

    ModelPackageGroupArn string
    ARN of the model package group.
    SourceModelPackageArn string
    ARN of the source model package.
    ModelPackageGroupArn string
    ARN of the model package group.
    SourceModelPackageArn string
    ARN of the source model package.
    modelPackageGroupArn String
    ARN of the model package group.
    sourceModelPackageArn String
    ARN of the source model package.
    modelPackageGroupArn string
    ARN of the model package group.
    sourceModelPackageArn string
    ARN of the source model package.
    model_package_group_arn str
    ARN of the model package group.
    source_model_package_arn str
    ARN of the source model package.
    modelPackageGroupArn String
    ARN of the model package group.
    sourceModelPackageArn String
    ARN of the source model package.

    TrainingJobOutputDataConfig, TrainingJobOutputDataConfigArgs

    S3OutputPath string
    S3 URI where output data is stored.
    CompressionType string
    Output compression type. Valid values: GZIP, NONE.
    KmsKeyId string
    KMS key ID used to encrypt the output data.
    S3OutputPath string
    S3 URI where output data is stored.
    CompressionType string
    Output compression type. Valid values: GZIP, NONE.
    KmsKeyId string
    KMS key ID used to encrypt the output data.
    s3OutputPath String
    S3 URI where output data is stored.
    compressionType String
    Output compression type. Valid values: GZIP, NONE.
    kmsKeyId String
    KMS key ID used to encrypt the output data.
    s3OutputPath string
    S3 URI where output data is stored.
    compressionType string
    Output compression type. Valid values: GZIP, NONE.
    kmsKeyId string
    KMS key ID used to encrypt the output data.
    s3_output_path str
    S3 URI where output data is stored.
    compression_type str
    Output compression type. Valid values: GZIP, NONE.
    kms_key_id str
    KMS key ID used to encrypt the output data.
    s3OutputPath String
    S3 URI where output data is stored.
    compressionType String
    Output compression type. Valid values: GZIP, NONE.
    kmsKeyId String
    KMS key ID used to encrypt the output data.

    TrainingJobProfilerConfig, TrainingJobProfilerConfigArgs

    DisableProfiler bool
    Whether to disable the profiler.
    ProfilingIntervalInMilliseconds int
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    ProfilingParameters Dictionary<string, string>
    Map of profiling parameters. Maximum of 20 entries.
    S3OutputPath string
    S3 URI where profiler output is stored.
    DisableProfiler bool
    Whether to disable the profiler.
    ProfilingIntervalInMilliseconds int
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    ProfilingParameters map[string]string
    Map of profiling parameters. Maximum of 20 entries.
    S3OutputPath string
    S3 URI where profiler output is stored.
    disableProfiler Boolean
    Whether to disable the profiler.
    profilingIntervalInMilliseconds Integer
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    profilingParameters Map<String,String>
    Map of profiling parameters. Maximum of 20 entries.
    s3OutputPath String
    S3 URI where profiler output is stored.
    disableProfiler boolean
    Whether to disable the profiler.
    profilingIntervalInMilliseconds number
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    profilingParameters {[key: string]: string}
    Map of profiling parameters. Maximum of 20 entries.
    s3OutputPath string
    S3 URI where profiler output is stored.
    disable_profiler bool
    Whether to disable the profiler.
    profiling_interval_in_milliseconds int
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    profiling_parameters Mapping[str, str]
    Map of profiling parameters. Maximum of 20 entries.
    s3_output_path str
    S3 URI where profiler output is stored.
    disableProfiler Boolean
    Whether to disable the profiler.
    profilingIntervalInMilliseconds Number
    Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
    profilingParameters Map<String>
    Map of profiling parameters. Maximum of 20 entries.
    s3OutputPath String
    S3 URI where profiler output is stored.

    TrainingJobProfilerRuleConfiguration, TrainingJobProfilerRuleConfigurationArgs

    RuleConfigurationName string
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    RuleEvaluatorImage string
    Docker image URI for the profiler rule evaluator.
    InstanceType string
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    LocalPath string
    Local path where profiler rule output is written.
    RuleParameters Dictionary<string, string>
    Map of parameters for the profiler rule. Maximum of 100 entries.
    S3OutputPath string
    S3 URI where profiler rule output is stored.
    VolumeSizeInGb int
    Size of the storage volume for the profiler rule evaluator, in GB.
    RuleConfigurationName string
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    RuleEvaluatorImage string
    Docker image URI for the profiler rule evaluator.
    InstanceType string
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    LocalPath string
    Local path where profiler rule output is written.
    RuleParameters map[string]string
    Map of parameters for the profiler rule. Maximum of 100 entries.
    S3OutputPath string
    S3 URI where profiler rule output is stored.
    VolumeSizeInGb int
    Size of the storage volume for the profiler rule evaluator, in GB.
    ruleConfigurationName String
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage String
    Docker image URI for the profiler rule evaluator.
    instanceType String
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath String
    Local path where profiler rule output is written.
    ruleParameters Map<String,String>
    Map of parameters for the profiler rule. Maximum of 100 entries.
    s3OutputPath String
    S3 URI where profiler rule output is stored.
    volumeSizeInGb Integer
    Size of the storage volume for the profiler rule evaluator, in GB.
    ruleConfigurationName string
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage string
    Docker image URI for the profiler rule evaluator.
    instanceType string
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath string
    Local path where profiler rule output is written.
    ruleParameters {[key: string]: string}
    Map of parameters for the profiler rule. Maximum of 100 entries.
    s3OutputPath string
    S3 URI where profiler rule output is stored.
    volumeSizeInGb number
    Size of the storage volume for the profiler rule evaluator, in GB.
    rule_configuration_name str
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    rule_evaluator_image str
    Docker image URI for the profiler rule evaluator.
    instance_type str
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    local_path str
    Local path where profiler rule output is written.
    rule_parameters Mapping[str, str]
    Map of parameters for the profiler rule. Maximum of 100 entries.
    s3_output_path str
    S3 URI where profiler rule output is stored.
    volume_size_in_gb int
    Size of the storage volume for the profiler rule evaluator, in GB.
    ruleConfigurationName String
    Name of the profiler rule configuration. Must be between 1 and 256 characters.
    ruleEvaluatorImage String
    Docker image URI for the profiler rule evaluator.
    instanceType String
    Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
    localPath String
    Local path where profiler rule output is written.
    ruleParameters Map<String>
    Map of parameters for the profiler rule. Maximum of 100 entries.
    s3OutputPath String
    S3 URI where profiler rule output is stored.
    volumeSizeInGb Number
    Size of the storage volume for the profiler rule evaluator, in GB.

    TrainingJobRemoteDebugConfig, TrainingJobRemoteDebugConfigArgs

    EnableRemoteDebug bool
    Whether to enable remote debugging for the training job.
    EnableRemoteDebug bool
    Whether to enable remote debugging for the training job.
    enableRemoteDebug Boolean
    Whether to enable remote debugging for the training job.
    enableRemoteDebug boolean
    Whether to enable remote debugging for the training job.
    enable_remote_debug bool
    Whether to enable remote debugging for the training job.
    enableRemoteDebug Boolean
    Whether to enable remote debugging for the training job.

    TrainingJobResourceConfig, TrainingJobResourceConfigArgs

    InstanceCount int
    Number of ML compute instances to use. Conflicts with instanceGroups.
    InstanceGroups List<TrainingJobResourceConfigInstanceGroup>
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    InstancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig
    Instance placement configuration. See instancePlacementConfig below.
    InstanceType string
    ML compute instance type. Conflicts with instanceGroups.
    KeepAlivePeriodInSeconds int
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    TrainingPlanArn string
    ARN of the training plan to use.
    VolumeKmsKeyId string
    KMS key ID used to encrypt data on the storage volume.
    VolumeSizeInGb int
    Size of the storage volume attached to each instance, in GB.
    InstanceCount int
    Number of ML compute instances to use. Conflicts with instanceGroups.
    InstanceGroups []TrainingJobResourceConfigInstanceGroup
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    InstancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig
    Instance placement configuration. See instancePlacementConfig below.
    InstanceType string
    ML compute instance type. Conflicts with instanceGroups.
    KeepAlivePeriodInSeconds int
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    TrainingPlanArn string
    ARN of the training plan to use.
    VolumeKmsKeyId string
    KMS key ID used to encrypt data on the storage volume.
    VolumeSizeInGb int
    Size of the storage volume attached to each instance, in GB.
    instanceCount Integer
    Number of ML compute instances to use. Conflicts with instanceGroups.
    instanceGroups List<TrainingJobResourceConfigInstanceGroup>
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    instancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig
    Instance placement configuration. See instancePlacementConfig below.
    instanceType String
    ML compute instance type. Conflicts with instanceGroups.
    keepAlivePeriodInSeconds Integer
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    trainingPlanArn String
    ARN of the training plan to use.
    volumeKmsKeyId String
    KMS key ID used to encrypt data on the storage volume.
    volumeSizeInGb Integer
    Size of the storage volume attached to each instance, in GB.
    instanceCount number
    Number of ML compute instances to use. Conflicts with instanceGroups.
    instanceGroups TrainingJobResourceConfigInstanceGroup[]
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    instancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig
    Instance placement configuration. See instancePlacementConfig below.
    instanceType string
    ML compute instance type. Conflicts with instanceGroups.
    keepAlivePeriodInSeconds number
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    trainingPlanArn string
    ARN of the training plan to use.
    volumeKmsKeyId string
    KMS key ID used to encrypt data on the storage volume.
    volumeSizeInGb number
    Size of the storage volume attached to each instance, in GB.
    instance_count int
    Number of ML compute instances to use. Conflicts with instanceGroups.
    instance_groups Sequence[TrainingJobResourceConfigInstanceGroup]
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    instance_placement_config TrainingJobResourceConfigInstancePlacementConfig
    Instance placement configuration. See instancePlacementConfig below.
    instance_type str
    ML compute instance type. Conflicts with instanceGroups.
    keep_alive_period_in_seconds int
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    training_plan_arn str
    ARN of the training plan to use.
    volume_kms_key_id str
    KMS key ID used to encrypt data on the storage volume.
    volume_size_in_gb int
    Size of the storage volume attached to each instance, in GB.
    instanceCount Number
    Number of ML compute instances to use. Conflicts with instanceGroups.
    instanceGroups List<Property Map>
    List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
    instancePlacementConfig Property Map
    Instance placement configuration. See instancePlacementConfig below.
    instanceType String
    ML compute instance type. Conflicts with instanceGroups.
    keepAlivePeriodInSeconds Number
    Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
    trainingPlanArn String
    ARN of the training plan to use.
    volumeKmsKeyId String
    KMS key ID used to encrypt data on the storage volume.
    volumeSizeInGb Number
    Size of the storage volume attached to each instance, in GB.

    TrainingJobResourceConfigInstanceGroup, TrainingJobResourceConfigInstanceGroupArgs

    InstanceCount int
    Number of instances in the group.
    InstanceGroupName string
    Name of the instance group.
    InstanceType string
    ML compute instance type for the group.
    InstanceCount int
    Number of instances in the group.
    InstanceGroupName string
    Name of the instance group.
    InstanceType string
    ML compute instance type for the group.
    instanceCount Integer
    Number of instances in the group.
    instanceGroupName String
    Name of the instance group.
    instanceType String
    ML compute instance type for the group.
    instanceCount number
    Number of instances in the group.
    instanceGroupName string
    Name of the instance group.
    instanceType string
    ML compute instance type for the group.
    instance_count int
    Number of instances in the group.
    instance_group_name str
    Name of the instance group.
    instance_type str
    ML compute instance type for the group.
    instanceCount Number
    Number of instances in the group.
    instanceGroupName String
    Name of the instance group.
    instanceType String
    ML compute instance type for the group.

    TrainingJobResourceConfigInstancePlacementConfig, TrainingJobResourceConfigInstancePlacementConfigArgs

    EnableMultipleJobs bool
    Whether to enable multiple jobs on the same instance.
    PlacementSpecifications List<TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification>
    Placement specifications for instance placement. See placementSpecifications below.
    EnableMultipleJobs bool
    Whether to enable multiple jobs on the same instance.
    PlacementSpecifications []TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification
    Placement specifications for instance placement. See placementSpecifications below.
    enableMultipleJobs Boolean
    Whether to enable multiple jobs on the same instance.
    placementSpecifications List<TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification>
    Placement specifications for instance placement. See placementSpecifications below.
    enableMultipleJobs boolean
    Whether to enable multiple jobs on the same instance.
    placementSpecifications TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification[]
    Placement specifications for instance placement. See placementSpecifications below.
    enable_multiple_jobs bool
    Whether to enable multiple jobs on the same instance.
    placement_specifications Sequence[TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification]
    Placement specifications for instance placement. See placementSpecifications below.
    enableMultipleJobs Boolean
    Whether to enable multiple jobs on the same instance.
    placementSpecifications List<Property Map>
    Placement specifications for instance placement. See placementSpecifications below.

    TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification, TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs

    InstanceCount int
    Number of instances in the placement.
    UltraServerId string
    Ultra server ID for the placement.
    InstanceCount int
    Number of instances in the placement.
    UltraServerId string
    Ultra server ID for the placement.
    instanceCount Integer
    Number of instances in the placement.
    ultraServerId String
    Ultra server ID for the placement.
    instanceCount number
    Number of instances in the placement.
    ultraServerId string
    Ultra server ID for the placement.
    instance_count int
    Number of instances in the placement.
    ultra_server_id str
    Ultra server ID for the placement.
    instanceCount Number
    Number of instances in the placement.
    ultraServerId String
    Ultra server ID for the placement.

    TrainingJobRetryStrategy, TrainingJobRetryStrategyArgs

    MaximumRetryAttempts int
    Maximum number of retry attempts. Valid values: 1–30.
    MaximumRetryAttempts int
    Maximum number of retry attempts. Valid values: 1–30.
    maximumRetryAttempts Integer
    Maximum number of retry attempts. Valid values: 1–30.
    maximumRetryAttempts number
    Maximum number of retry attempts. Valid values: 1–30.
    maximum_retry_attempts int
    Maximum number of retry attempts. Valid values: 1–30.
    maximumRetryAttempts Number
    Maximum number of retry attempts. Valid values: 1–30.

    TrainingJobServerlessJobConfig, TrainingJobServerlessJobConfigArgs

    BaseModelArn string
    ARN of the base foundation model from the SageMaker AI Public Hub.
    JobType string
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    AcceptEula bool
    Whether to accept the model EULA.
    CustomizationTechnique string
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    EvaluationType string
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    EvaluatorArn string
    ARN of the evaluator.
    Peft string
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.
    BaseModelArn string
    ARN of the base foundation model from the SageMaker AI Public Hub.
    JobType string
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    AcceptEula bool
    Whether to accept the model EULA.
    CustomizationTechnique string
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    EvaluationType string
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    EvaluatorArn string
    ARN of the evaluator.
    Peft string
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.
    baseModelArn String
    ARN of the base foundation model from the SageMaker AI Public Hub.
    jobType String
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    acceptEula Boolean
    Whether to accept the model EULA.
    customizationTechnique String
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    evaluationType String
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    evaluatorArn String
    ARN of the evaluator.
    peft String
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.
    baseModelArn string
    ARN of the base foundation model from the SageMaker AI Public Hub.
    jobType string
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    acceptEula boolean
    Whether to accept the model EULA.
    customizationTechnique string
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    evaluationType string
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    evaluatorArn string
    ARN of the evaluator.
    peft string
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.
    base_model_arn str
    ARN of the base foundation model from the SageMaker AI Public Hub.
    job_type str
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    accept_eula bool
    Whether to accept the model EULA.
    customization_technique str
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    evaluation_type str
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    evaluator_arn str
    ARN of the evaluator.
    peft str
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.
    baseModelArn String
    ARN of the base foundation model from the SageMaker AI Public Hub.
    jobType String
    Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
    acceptEula Boolean
    Whether to accept the model EULA.
    customizationTechnique String
    Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
    evaluationType String
    Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
    evaluatorArn String
    ARN of the evaluator.
    peft String
    Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

    TrainingJobSessionChainingConfig, TrainingJobSessionChainingConfigArgs

    EnableSessionTagChaining bool
    Whether to enable session tag chaining for the training job.
    EnableSessionTagChaining bool
    Whether to enable session tag chaining for the training job.
    enableSessionTagChaining Boolean
    Whether to enable session tag chaining for the training job.
    enableSessionTagChaining boolean
    Whether to enable session tag chaining for the training job.
    enable_session_tag_chaining bool
    Whether to enable session tag chaining for the training job.
    enableSessionTagChaining Boolean
    Whether to enable session tag chaining for the training job.

    TrainingJobStoppingCondition, TrainingJobStoppingConditionArgs

    MaxPendingTimeInSeconds int
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    MaxRuntimeInSeconds int
    Maximum time in seconds the training job can run before it is stopped.
    MaxWaitTimeInSeconds int
    Maximum time in seconds to wait for a managed spot training job to complete.
    MaxPendingTimeInSeconds int
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    MaxRuntimeInSeconds int
    Maximum time in seconds the training job can run before it is stopped.
    MaxWaitTimeInSeconds int
    Maximum time in seconds to wait for a managed spot training job to complete.
    maxPendingTimeInSeconds Integer
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    maxRuntimeInSeconds Integer
    Maximum time in seconds the training job can run before it is stopped.
    maxWaitTimeInSeconds Integer
    Maximum time in seconds to wait for a managed spot training job to complete.
    maxPendingTimeInSeconds number
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    maxRuntimeInSeconds number
    Maximum time in seconds the training job can run before it is stopped.
    maxWaitTimeInSeconds number
    Maximum time in seconds to wait for a managed spot training job to complete.
    max_pending_time_in_seconds int
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    max_runtime_in_seconds int
    Maximum time in seconds the training job can run before it is stopped.
    max_wait_time_in_seconds int
    Maximum time in seconds to wait for a managed spot training job to complete.
    maxPendingTimeInSeconds Number
    Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
    maxRuntimeInSeconds Number
    Maximum time in seconds the training job can run before it is stopped.
    maxWaitTimeInSeconds Number
    Maximum time in seconds to wait for a managed spot training job to complete.

    TrainingJobTensorBoardOutputConfig, TrainingJobTensorBoardOutputConfigArgs

    S3OutputPath string
    S3 URI where TensorBoard output is stored.
    LocalPath string
    Local path where TensorBoard output is written.
    S3OutputPath string
    S3 URI where TensorBoard output is stored.
    LocalPath string
    Local path where TensorBoard output is written.
    s3OutputPath String
    S3 URI where TensorBoard output is stored.
    localPath String
    Local path where TensorBoard output is written.
    s3OutputPath string
    S3 URI where TensorBoard output is stored.
    localPath string
    Local path where TensorBoard output is written.
    s3_output_path str
    S3 URI where TensorBoard output is stored.
    local_path str
    Local path where TensorBoard output is written.
    s3OutputPath String
    S3 URI where TensorBoard output is stored.
    localPath String
    Local path where TensorBoard output is written.

    TrainingJobTimeouts, TrainingJobTimeoutsArgs

    Create string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    Delete string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    Update string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    Create string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    Delete string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    Update string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    create String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    delete String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    update String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    create string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    delete string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    update string
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    create str
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    delete str
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    update str
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    create String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
    delete String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
    update String
    A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

    TrainingJobVpcConfig, TrainingJobVpcConfigArgs

    SecurityGroupIds List<string>
    List of VPC security group IDs. Maximum of 5.
    Subnets List<string>
    List of subnet IDs. Maximum of 16.
    SecurityGroupIds []string
    List of VPC security group IDs. Maximum of 5.
    Subnets []string
    List of subnet IDs. Maximum of 16.
    securityGroupIds List<String>
    List of VPC security group IDs. Maximum of 5.
    subnets List<String>
    List of subnet IDs. Maximum of 16.
    securityGroupIds string[]
    List of VPC security group IDs. Maximum of 5.
    subnets string[]
    List of subnet IDs. Maximum of 16.
    security_group_ids Sequence[str]
    List of VPC security group IDs. Maximum of 5.
    subnets Sequence[str]
    List of subnet IDs. Maximum of 16.
    securityGroupIds List<String>
    List of VPC security group IDs. Maximum of 5.
    subnets List<String>
    List of subnet IDs. Maximum of 16.

    Import

    Identity Schema

    Required

    • trainingJobName - (String) Name of the Training Job.

    Optional

    • accountId (String) AWS Account where this resource is managed.
    • region (String) Region where this resource is managed.

    Using pulumi import, import SageMaker AI Training Job using the trainingJobName. For example:

    $ pulumi import aws:sagemaker/trainingJob:TrainingJob example my-training-job
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    AWS Classic pulumi/pulumi-aws
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the aws Terraform Provider.
    aws logo
    Viewing docs for AWS v7.28.0
    published on Thursday, Apr 30, 2026 by Pulumi
      Try Pulumi Cloud free. Your team will thank you.