TrainingJob

AWS v7.28.0, Apr 30 26

Viewing docs for AWS v7.28.0
published on Thursday, Apr 30, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

Viewing docs for AWS v7.28.0
published on Thursday, Apr 30, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

Example Usage

Basic Usage

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
    },
    outputDataConfig: {
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.large",
        instanceCount: 1,
        volumeSizeInGb: 30,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
    },
    output_data_config={
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.large",
        "instance_count": 1,
        "volume_size_in_gb": 30,
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName: pulumi.String("example"),
			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode: pulumi.String("File"),
				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:   pulumi.String("ml.m5.large"),
				InstanceCount:  pulumi.Int(1),
				VolumeSizeInGb: pulumi.Int(30),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds: pulumi.Int(3600),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.large",
            InstanceCount = 1,
            VolumeSizeInGb = 30,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                .build())
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.large")
                .instanceCount(1)
                .volumeSizeInGb(30)
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .build())
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
      outputDataConfig:
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.large
        instanceCount: 1
        volumeSizeInGb: 30
      stoppingCondition:
        maxRuntimeInSeconds: 3600

With VPC Configuration

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
    },
    outputDataConfig: {
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.large",
        instanceCount: 1,
        volumeSizeInGb: 30,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
    },
    vpcConfig: {
        securityGroupIds: [exampleAwsSecurityGroup.id],
        subnets: [exampleAwsSubnet.id],
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
    },
    output_data_config={
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.large",
        "instance_count": 1,
        "volume_size_in_gb": 30,
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
    },
    vpc_config={
        "security_group_ids": [example_aws_security_group["id"]],
        "subnets": [example_aws_subnet["id"]],
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName: pulumi.String("example"),
			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode: pulumi.String("File"),
				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:   pulumi.String("ml.m5.large"),
				InstanceCount:  pulumi.Int(1),
				VolumeSizeInGb: pulumi.Int(30),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds: pulumi.Int(3600),
			},
			VpcConfig: &sagemaker.TrainingJobVpcConfigArgs{
				SecurityGroupIds: pulumi.StringArray{
					exampleAwsSecurityGroup.Id,
				},
				Subnets: pulumi.StringArray{
					exampleAwsSubnet.Id,
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.large",
            InstanceCount = 1,
            VolumeSizeInGb = 30,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
        },
        VpcConfig = new Aws.Sagemaker.Inputs.TrainingJobVpcConfigArgs
        {
            SecurityGroupIds = new[]
            {
                exampleAwsSecurityGroup.Id,
            },
            Subnets = new[]
            {
                exampleAwsSubnet.Id,
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobVpcConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                .build())
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.large")
                .instanceCount(1)
                .volumeSizeInGb(30)
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .build())
            .vpcConfig(TrainingJobVpcConfigArgs.builder()
                .securityGroupIds(exampleAwsSecurityGroup.id())
                .subnets(exampleAwsSubnet.id())
                .build())
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
      outputDataConfig:
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.large
        instanceCount: 1
        volumeSizeInGb: 30
      stoppingCondition:
        maxRuntimeInSeconds: 3600
      vpcConfig:
        securityGroupIds:
          - ${exampleAwsSecurityGroup.id}
        subnets:
          - ${exampleAwsSubnet.id}

With Input Data and Hyperparameters

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
        enableSagemakerMetricsTimeSeries: true,
    },
    hyperParameters: {
        mini_batch_size: "200",
        epochs: "10",
    },
    inputDataConfigs: [{
        channelName: "train",
        dataSource: {
            s3DataSource: {
                s3DataType: "S3Prefix",
                s3Uri: `s3://${exampleAwsS3Bucket.bucket}/train/`,
            },
        },
    }],
    outputDataConfig: {
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.large",
        instanceCount: 1,
        volumeSizeInGb: 30,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
        "enable_sagemaker_metrics_time_series": True,
    },
    hyper_parameters={
        "mini_batch_size": "200",
        "epochs": "10",
    },
    input_data_configs=[{
        "channel_name": "train",
        "data_source": {
            "s3_data_source": {
                "s3_data_type": "S3Prefix",
                "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/train/",
            },
        },
    }],
    output_data_config={
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.large",
        "instance_count": 1,
        "volume_size_in_gb": 30,
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName: pulumi.String("example"),
			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode:                pulumi.String("File"),
				TrainingImage:                    pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
				EnableSagemakerMetricsTimeSeries: pulumi.Bool(true),
			},
			HyperParameters: pulumi.StringMap{
				"mini_batch_size": pulumi.String("200"),
				"epochs":          pulumi.String("10"),
			},
			InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
				&sagemaker.TrainingJobInputDataConfigArgs{
					ChannelName: pulumi.String("train"),
					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
							S3DataType: pulumi.String("S3Prefix"),
							S3Uri:      pulumi.Sprintf("s3://%v/train/", exampleAwsS3Bucket.Bucket),
						},
					},
				},
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:   pulumi.String("ml.m5.large"),
				InstanceCount:  pulumi.Int(1),
				VolumeSizeInGb: pulumi.Int(30),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds: pulumi.Int(3600),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
            EnableSagemakerMetricsTimeSeries = true,
        },
        HyperParameters = 
        {
            { "mini_batch_size", "200" },
            { "epochs", "10" },
        },
        InputDataConfigs = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
            {
                ChannelName = "train",
                DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                {
                    S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                    {
                        S3DataType = "S3Prefix",
                        S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/train/",
                    },
                },
            },
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.large",
            InstanceCount = 1,
            VolumeSizeInGb = 30,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                .enableSagemakerMetricsTimeSeries(true)
                .build())
            .hyperParameters(Map.ofEntries(
                Map.entry("mini_batch_size", "200"),
                Map.entry("epochs", "10")
            ))
            .inputDataConfigs(TrainingJobInputDataConfigArgs.builder()
                .channelName("train")
                .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                    .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                        .s3DataType("S3Prefix")
                        .s3Uri(String.format("s3://%s/train/", exampleAwsS3Bucket.bucket()))
                        .build())
                    .build())
                .build())
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.large")
                .instanceCount(1)
                .volumeSizeInGb(30)
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .build())
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
        enableSagemakerMetricsTimeSeries: true
      hyperParameters:
        mini_batch_size: '200'
        epochs: '10'
      inputDataConfigs:
        - channelName: train
          dataSource:
            s3DataSource:
              s3DataType: S3Prefix
              s3Uri: s3://${exampleAwsS3Bucket.bucket}/train/
      outputDataConfig:
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.large
        instanceCount: 1
        volumeSizeInGb: 30
      stoppingCondition:
        maxRuntimeInSeconds: 3600

With Encrypted Output, Checkpoints, and TensorBoard

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
    },
    checkpointConfig: {
        localPath: "/opt/ml/checkpoints",
        s3Uri: `s3://${exampleAwsS3Bucket.bucket}/checkpoints/`,
    },
    outputDataConfig: {
        compressionType: "GZIP",
        kmsKeyId: exampleAwsKmsKey.arn,
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.large",
        instanceCount: 1,
        volumeSizeInGb: 30,
        volumeKmsKeyId: exampleAwsKmsKey.arn,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
    },
    tensorBoardOutputConfig: {
        localPath: "/opt/ml/output/tensorboard",
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/tensorboard/`,
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
    },
    checkpoint_config={
        "local_path": "/opt/ml/checkpoints",
        "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/checkpoints/",
    },
    output_data_config={
        "compression_type": "GZIP",
        "kms_key_id": example_aws_kms_key["arn"],
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.large",
        "instance_count": 1,
        "volume_size_in_gb": 30,
        "volume_kms_key_id": example_aws_kms_key["arn"],
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
    },
    tensor_board_output_config={
        "local_path": "/opt/ml/output/tensorboard",
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/tensorboard/",
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName: pulumi.String("example"),
			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode: pulumi.String("File"),
				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
			},
			CheckpointConfig: &sagemaker.TrainingJobCheckpointConfigArgs{
				LocalPath: pulumi.String("/opt/ml/checkpoints"),
				S3Uri:     pulumi.Sprintf("s3://%v/checkpoints/", exampleAwsS3Bucket.Bucket),
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				CompressionType: pulumi.String("GZIP"),
				KmsKeyId:        pulumi.Any(exampleAwsKmsKey.Arn),
				S3OutputPath:    pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:   pulumi.String("ml.m5.large"),
				InstanceCount:  pulumi.Int(1),
				VolumeSizeInGb: pulumi.Int(30),
				VolumeKmsKeyId: pulumi.Any(exampleAwsKmsKey.Arn),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds: pulumi.Int(3600),
			},
			TensorBoardOutputConfig: &sagemaker.TrainingJobTensorBoardOutputConfigArgs{
				LocalPath:    pulumi.String("/opt/ml/output/tensorboard"),
				S3OutputPath: pulumi.Sprintf("s3://%v/tensorboard/", exampleAwsS3Bucket.Bucket),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
        },
        CheckpointConfig = new Aws.Sagemaker.Inputs.TrainingJobCheckpointConfigArgs
        {
            LocalPath = "/opt/ml/checkpoints",
            S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/checkpoints/",
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            CompressionType = "GZIP",
            KmsKeyId = exampleAwsKmsKey.Arn,
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.large",
            InstanceCount = 1,
            VolumeSizeInGb = 30,
            VolumeKmsKeyId = exampleAwsKmsKey.Arn,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
        },
        TensorBoardOutputConfig = new Aws.Sagemaker.Inputs.TrainingJobTensorBoardOutputConfigArgs
        {
            LocalPath = "/opt/ml/output/tensorboard",
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/tensorboard/",
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobCheckpointConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobTensorBoardOutputConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                .build())
            .checkpointConfig(TrainingJobCheckpointConfigArgs.builder()
                .localPath("/opt/ml/checkpoints")
                .s3Uri(String.format("s3://%s/checkpoints/", exampleAwsS3Bucket.bucket()))
                .build())
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .compressionType("GZIP")
                .kmsKeyId(exampleAwsKmsKey.arn())
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.large")
                .instanceCount(1)
                .volumeSizeInGb(30)
                .volumeKmsKeyId(exampleAwsKmsKey.arn())
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .build())
            .tensorBoardOutputConfig(TrainingJobTensorBoardOutputConfigArgs.builder()
                .localPath("/opt/ml/output/tensorboard")
                .s3OutputPath(String.format("s3://%s/tensorboard/", exampleAwsS3Bucket.bucket()))
                .build())
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
      checkpointConfig:
        localPath: /opt/ml/checkpoints
        s3Uri: s3://${exampleAwsS3Bucket.bucket}/checkpoints/
      outputDataConfig:
        compressionType: GZIP
        kmsKeyId: ${exampleAwsKmsKey.arn}
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.large
        instanceCount: 1
        volumeSizeInGb: 30
        volumeKmsKeyId: ${exampleAwsKmsKey.arn}
      stoppingCondition:
        maxRuntimeInSeconds: 3600
      tensorBoardOutputConfig:
        localPath: /opt/ml/output/tensorboard
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/tensorboard/

With Managed Spot Training and Custom Metrics

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    enableManagedSpotTraining: true,
    enableNetworkIsolation: true,
    enableInterContainerTrafficEncryption: true,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: trainingImage,
        containerEntrypoints: [
            "python",
            "/opt/ml/code/train.py",
        ],
        containerArguments: [
            "--epochs",
            "10",
            "--batch-size",
            "128",
        ],
        metricDefinitions: [
            {
                name: "train:loss",
                regex: "loss: ([0-9\\.]+)",
            },
            {
                name: "validation:accuracy",
                regex: "accuracy: ([0-9\\.]+)",
            },
        ],
    },
    environment: {
        MODEL_DIR: "/opt/ml/model",
        SM_LOG_LEVEL: "20",
    },
    hyperParameters: {
        epochs: "10",
        batch_size: "128",
    },
    outputDataConfig: {
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.xlarge",
        instanceCount: 1,
        volumeSizeInGb: 50,
        keepAlivePeriodInSeconds: 600,
    },
    retryStrategy: {
        maximumRetryAttempts: 3,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
        maxWaitTimeInSeconds: 7200,
    },
    tags: {
        Environment: "test",
        Workload: "training",
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    enable_managed_spot_training=True,
    enable_network_isolation=True,
    enable_inter_container_traffic_encryption=True,
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": training_image,
        "container_entrypoints": [
            "python",
            "/opt/ml/code/train.py",
        ],
        "container_arguments": [
            "--epochs",
            "10",
            "--batch-size",
            "128",
        ],
        "metric_definitions": [
            {
                "name": "train:loss",
                "regex": "loss: ([0-9\\.]+)",
            },
            {
                "name": "validation:accuracy",
                "regex": "accuracy: ([0-9\\.]+)",
            },
        ],
    },
    environment={
        "MODEL_DIR": "/opt/ml/model",
        "SM_LOG_LEVEL": "20",
    },
    hyper_parameters={
        "epochs": "10",
        "batch_size": "128",
    },
    output_data_config={
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.xlarge",
        "instance_count": 1,
        "volume_size_in_gb": 50,
        "keep_alive_period_in_seconds": 600,
    },
    retry_strategy={
        "maximum_retry_attempts": 3,
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
        "max_wait_time_in_seconds": 7200,
    },
    tags={
        "Environment": "test",
        "Workload": "training",
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName:                       pulumi.String("example"),
			RoleArn:                               pulumi.Any(exampleAwsIamRole.Arn),
			EnableManagedSpotTraining:             pulumi.Bool(true),
			EnableNetworkIsolation:                pulumi.Bool(true),
			EnableInterContainerTrafficEncryption: pulumi.Bool(true),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode: pulumi.String("File"),
				TrainingImage:     pulumi.Any(trainingImage),
				ContainerEntrypoints: pulumi.StringArray{
					pulumi.String("python"),
					pulumi.String("/opt/ml/code/train.py"),
				},
				ContainerArguments: pulumi.StringArray{
					pulumi.String("--epochs"),
					pulumi.String("10"),
					pulumi.String("--batch-size"),
					pulumi.String("128"),
				},
				MetricDefinitions: sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArray{
					&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
						Name:  pulumi.String("train:loss"),
						Regex: pulumi.String("loss: ([0-9\\.]+)"),
					},
					&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
						Name:  pulumi.String("validation:accuracy"),
						Regex: pulumi.String("accuracy: ([0-9\\.]+)"),
					},
				},
			},
			Environment: pulumi.StringMap{
				"MODEL_DIR":    pulumi.String("/opt/ml/model"),
				"SM_LOG_LEVEL": pulumi.String("20"),
			},
			HyperParameters: pulumi.StringMap{
				"epochs":     pulumi.String("10"),
				"batch_size": pulumi.String("128"),
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:             pulumi.String("ml.m5.xlarge"),
				InstanceCount:            pulumi.Int(1),
				VolumeSizeInGb:           pulumi.Int(50),
				KeepAlivePeriodInSeconds: pulumi.Int(600),
			},
			RetryStrategy: &sagemaker.TrainingJobRetryStrategyArgs{
				MaximumRetryAttempts: pulumi.Int(3),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds:  pulumi.Int(3600),
				MaxWaitTimeInSeconds: pulumi.Int(7200),
			},
			Tags: pulumi.StringMap{
				"Environment": pulumi.String("test"),
				"Workload":    pulumi.String("training"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        EnableManagedSpotTraining = true,
        EnableNetworkIsolation = true,
        EnableInterContainerTrafficEncryption = true,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = trainingImage,
            ContainerEntrypoints = new[]
            {
                "python",
                "/opt/ml/code/train.py",
            },
            ContainerArguments = new[]
            {
                "--epochs",
                "10",
                "--batch-size",
                "128",
            },
            MetricDefinitions = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
                {
                    Name = "train:loss",
                    Regex = "loss: ([0-9\\.]+)",
                },
                new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
                {
                    Name = "validation:accuracy",
                    Regex = "accuracy: ([0-9\\.]+)",
                },
            },
        },
        Environment = 
        {
            { "MODEL_DIR", "/opt/ml/model" },
            { "SM_LOG_LEVEL", "20" },
        },
        HyperParameters = 
        {
            { "epochs", "10" },
            { "batch_size", "128" },
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.xlarge",
            InstanceCount = 1,
            VolumeSizeInGb = 50,
            KeepAlivePeriodInSeconds = 600,
        },
        RetryStrategy = new Aws.Sagemaker.Inputs.TrainingJobRetryStrategyArgs
        {
            MaximumRetryAttempts = 3,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
            MaxWaitTimeInSeconds = 7200,
        },
        Tags = 
        {
            { "Environment", "test" },
            { "Workload", "training" },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobRetryStrategyArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .enableManagedSpotTraining(true)
            .enableNetworkIsolation(true)
            .enableInterContainerTrafficEncryption(true)
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(trainingImage)
                .containerEntrypoints(                
                    "python",
                    "/opt/ml/code/train.py")
                .containerArguments(                
                    "--epochs",
                    "10",
                    "--batch-size",
                    "128")
                .metricDefinitions(                
                    TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
                        .name("train:loss")
                        .regex("loss: ([0-9\\.]+)")
                        .build(),
                    TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
                        .name("validation:accuracy")
                        .regex("accuracy: ([0-9\\.]+)")
                        .build())
                .build())
            .environment(Map.ofEntries(
                Map.entry("MODEL_DIR", "/opt/ml/model"),
                Map.entry("SM_LOG_LEVEL", "20")
            ))
            .hyperParameters(Map.ofEntries(
                Map.entry("epochs", "10"),
                Map.entry("batch_size", "128")
            ))
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.xlarge")
                .instanceCount(1)
                .volumeSizeInGb(50)
                .keepAlivePeriodInSeconds(600)
                .build())
            .retryStrategy(TrainingJobRetryStrategyArgs.builder()
                .maximumRetryAttempts(3)
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .maxWaitTimeInSeconds(7200)
                .build())
            .tags(Map.ofEntries(
                Map.entry("Environment", "test"),
                Map.entry("Workload", "training")
            ))
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      enableManagedSpotTraining: true
      enableNetworkIsolation: true
      enableInterContainerTrafficEncryption: true
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${trainingImage}
        containerEntrypoints:
          - python
          - /opt/ml/code/train.py
        containerArguments:
          - --epochs
          - '10'
          - --batch-size
          - '128'
        metricDefinitions:
          - name: train:loss
            regex: 'loss: ([0-9\.]+)'
          - name: validation:accuracy
            regex: 'accuracy: ([0-9\.]+)'
      environment:
        MODEL_DIR: /opt/ml/model
        SM_LOG_LEVEL: '20'
      hyperParameters:
        epochs: '10'
        batch_size: '128'
      outputDataConfig:
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.xlarge
        instanceCount: 1
        volumeSizeInGb: 50
        keepAlivePeriodInSeconds: 600
      retryStrategy:
        maximumRetryAttempts: 3
      stoppingCondition:
        maxRuntimeInSeconds: 3600
        maxWaitTimeInSeconds: 7200
      tags:
        Environment: test
        Workload: training

With Multiple Input Channels, Infrastructure Checks, and Session Tag Chaining

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.sagemaker.TrainingJob("example", {
    trainingJobName: "example",
    roleArn: exampleAwsIamRole.arn,
    algorithmSpecification: {
        trainingInputMode: "File",
        trainingImage: exampleAwsSagemakerPrebuiltEcrImage.registryPath,
    },
    inputDataConfigs: [
        {
            channelName: "train",
            contentType: "text/csv",
            inputMode: "File",
            dataSource: {
                s3DataSource: {
                    s3DataDistributionType: "FullyReplicated",
                    s3DataType: "S3Prefix",
                    s3Uri: `s3://${exampleAwsS3Bucket.bucket}/train/`,
                },
            },
        },
        {
            channelName: "validation",
            contentType: "text/csv",
            inputMode: "File",
            dataSource: {
                s3DataSource: {
                    s3DataDistributionType: "FullyReplicated",
                    s3DataType: "S3Prefix",
                    s3Uri: `s3://${exampleAwsS3Bucket.bucket}/validation/`,
                },
            },
        },
    ],
    infraCheckConfig: {
        enableInfraCheck: true,
    },
    outputDataConfig: {
        s3OutputPath: `s3://${exampleAwsS3Bucket.bucket}/output/`,
    },
    resourceConfig: {
        instanceType: "ml.m5.large",
        instanceCount: 1,
        volumeSizeInGb: 30,
    },
    sessionChainingConfig: {
        enableSessionTagChaining: true,
    },
    stoppingCondition: {
        maxRuntimeInSeconds: 3600,
    },
});

import pulumi
import pulumi_aws as aws

example = aws.sagemaker.TrainingJob("example",
    training_job_name="example",
    role_arn=example_aws_iam_role["arn"],
    algorithm_specification={
        "training_input_mode": "File",
        "training_image": example_aws_sagemaker_prebuilt_ecr_image["registryPath"],
    },
    input_data_configs=[
        {
            "channel_name": "train",
            "content_type": "text/csv",
            "input_mode": "File",
            "data_source": {
                "s3_data_source": {
                    "s3_data_distribution_type": "FullyReplicated",
                    "s3_data_type": "S3Prefix",
                    "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/train/",
                },
            },
        },
        {
            "channel_name": "validation",
            "content_type": "text/csv",
            "input_mode": "File",
            "data_source": {
                "s3_data_source": {
                    "s3_data_distribution_type": "FullyReplicated",
                    "s3_data_type": "S3Prefix",
                    "s3_uri": f"s3://{example_aws_s3_bucket['bucket']}/validation/",
                },
            },
        },
    ],
    infra_check_config={
        "enable_infra_check": True,
    },
    output_data_config={
        "s3_output_path": f"s3://{example_aws_s3_bucket['bucket']}/output/",
    },
    resource_config={
        "instance_type": "ml.m5.large",
        "instance_count": 1,
        "volume_size_in_gb": 30,
    },
    session_chaining_config={
        "enable_session_tag_chaining": True,
    },
    stopping_condition={
        "max_runtime_in_seconds": 3600,
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewTrainingJob(ctx, "example", &sagemaker.TrainingJobArgs{
			TrainingJobName: pulumi.String("example"),
			RoleArn:         pulumi.Any(exampleAwsIamRole.Arn),
			AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
				TrainingInputMode: pulumi.String("File"),
				TrainingImage:     pulumi.Any(exampleAwsSagemakerPrebuiltEcrImage.RegistryPath),
			},
			InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
				&sagemaker.TrainingJobInputDataConfigArgs{
					ChannelName: pulumi.String("train"),
					ContentType: pulumi.String("text/csv"),
					InputMode:   pulumi.String("File"),
					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
							S3DataDistributionType: pulumi.String("FullyReplicated"),
							S3DataType:             pulumi.String("S3Prefix"),
							S3Uri:                  pulumi.Sprintf("s3://%v/train/", exampleAwsS3Bucket.Bucket),
						},
					},
				},
				&sagemaker.TrainingJobInputDataConfigArgs{
					ChannelName: pulumi.String("validation"),
					ContentType: pulumi.String("text/csv"),
					InputMode:   pulumi.String("File"),
					DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
						S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
							S3DataDistributionType: pulumi.String("FullyReplicated"),
							S3DataType:             pulumi.String("S3Prefix"),
							S3Uri:                  pulumi.Sprintf("s3://%v/validation/", exampleAwsS3Bucket.Bucket),
						},
					},
				},
			},
			InfraCheckConfig: &sagemaker.TrainingJobInfraCheckConfigArgs{
				EnableInfraCheck: pulumi.Bool(true),
			},
			OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
				S3OutputPath: pulumi.Sprintf("s3://%v/output/", exampleAwsS3Bucket.Bucket),
			},
			ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
				InstanceType:   pulumi.String("ml.m5.large"),
				InstanceCount:  pulumi.Int(1),
				VolumeSizeInGb: pulumi.Int(30),
			},
			SessionChainingConfig: &sagemaker.TrainingJobSessionChainingConfigArgs{
				EnableSessionTagChaining: pulumi.Bool(true),
			},
			StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
				MaxRuntimeInSeconds: pulumi.Int(3600),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var example = new Aws.Sagemaker.TrainingJob("example", new()
    {
        TrainingJobName = "example",
        RoleArn = exampleAwsIamRole.Arn,
        AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
        {
            TrainingInputMode = "File",
            TrainingImage = exampleAwsSagemakerPrebuiltEcrImage.RegistryPath,
        },
        InputDataConfigs = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
            {
                ChannelName = "train",
                ContentType = "text/csv",
                InputMode = "File",
                DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                {
                    S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                    {
                        S3DataDistributionType = "FullyReplicated",
                        S3DataType = "S3Prefix",
                        S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/train/",
                    },
                },
            },
            new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
            {
                ChannelName = "validation",
                ContentType = "text/csv",
                InputMode = "File",
                DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
                {
                    S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                    {
                        S3DataDistributionType = "FullyReplicated",
                        S3DataType = "S3Prefix",
                        S3Uri = $"s3://{exampleAwsS3Bucket.Bucket}/validation/",
                    },
                },
            },
        },
        InfraCheckConfig = new Aws.Sagemaker.Inputs.TrainingJobInfraCheckConfigArgs
        {
            EnableInfraCheck = true,
        },
        OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
        {
            S3OutputPath = $"s3://{exampleAwsS3Bucket.Bucket}/output/",
        },
        ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
        {
            InstanceType = "ml.m5.large",
            InstanceCount = 1,
            VolumeSizeInGb = 30,
        },
        SessionChainingConfig = new Aws.Sagemaker.Inputs.TrainingJobSessionChainingConfigArgs
        {
            EnableSessionTagChaining = true,
        },
        StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
        {
            MaxRuntimeInSeconds = 3600,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.TrainingJob;
import com.pulumi.aws.sagemaker.TrainingJobArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobAlgorithmSpecificationArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobInfraCheckConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobOutputDataConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobResourceConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobSessionChainingConfigArgs;
import com.pulumi.aws.sagemaker.inputs.TrainingJobStoppingConditionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new TrainingJob("example", TrainingJobArgs.builder()
            .trainingJobName("example")
            .roleArn(exampleAwsIamRole.arn())
            .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
                .trainingInputMode("File")
                .trainingImage(exampleAwsSagemakerPrebuiltEcrImage.registryPath())
                .build())
            .inputDataConfigs(            
                TrainingJobInputDataConfigArgs.builder()
                    .channelName("train")
                    .contentType("text/csv")
                    .inputMode("File")
                    .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                        .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                            .s3DataDistributionType("FullyReplicated")
                            .s3DataType("S3Prefix")
                            .s3Uri(String.format("s3://%s/train/", exampleAwsS3Bucket.bucket()))
                            .build())
                        .build())
                    .build(),
                TrainingJobInputDataConfigArgs.builder()
                    .channelName("validation")
                    .contentType("text/csv")
                    .inputMode("File")
                    .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
                        .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                            .s3DataDistributionType("FullyReplicated")
                            .s3DataType("S3Prefix")
                            .s3Uri(String.format("s3://%s/validation/", exampleAwsS3Bucket.bucket()))
                            .build())
                        .build())
                    .build())
            .infraCheckConfig(TrainingJobInfraCheckConfigArgs.builder()
                .enableInfraCheck(true)
                .build())
            .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
                .s3OutputPath(String.format("s3://%s/output/", exampleAwsS3Bucket.bucket()))
                .build())
            .resourceConfig(TrainingJobResourceConfigArgs.builder()
                .instanceType("ml.m5.large")
                .instanceCount(1)
                .volumeSizeInGb(30)
                .build())
            .sessionChainingConfig(TrainingJobSessionChainingConfigArgs.builder()
                .enableSessionTagChaining(true)
                .build())
            .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
                .maxRuntimeInSeconds(3600)
                .build())
            .build());

    }
}

resources:
  example:
    type: aws:sagemaker:TrainingJob
    properties:
      trainingJobName: example
      roleArn: ${exampleAwsIamRole.arn}
      algorithmSpecification:
        trainingInputMode: File
        trainingImage: ${exampleAwsSagemakerPrebuiltEcrImage.registryPath}
      inputDataConfigs:
        - channelName: train
          contentType: text/csv
          inputMode: File
          dataSource:
            s3DataSource:
              s3DataDistributionType: FullyReplicated
              s3DataType: S3Prefix
              s3Uri: s3://${exampleAwsS3Bucket.bucket}/train/
        - channelName: validation
          contentType: text/csv
          inputMode: File
          dataSource:
            s3DataSource:
              s3DataDistributionType: FullyReplicated
              s3DataType: S3Prefix
              s3Uri: s3://${exampleAwsS3Bucket.bucket}/validation/
      infraCheckConfig:
        enableInfraCheck: true
      outputDataConfig:
        s3OutputPath: s3://${exampleAwsS3Bucket.bucket}/output/
      resourceConfig:
        instanceType: ml.m5.large
        instanceCount: 1
        volumeSizeInGb: 30
      sessionChainingConfig:
        enableSessionTagChaining: true
      stoppingCondition:
        maxRuntimeInSeconds: 3600

Create TrainingJob Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new TrainingJob(name: string, args: TrainingJobArgs, opts?: CustomResourceOptions);

@overload
def TrainingJob(resource_name: str,
                args: TrainingJobArgs,
                opts: Optional[ResourceOptions] = None)

@overload
def TrainingJob(resource_name: str,
                opts: Optional[ResourceOptions] = None,
                role_arn: Optional[str] = None,
                training_job_name: Optional[str] = None,
                output_data_config: Optional[TrainingJobOutputDataConfigArgs] = None,
                retry_strategy: Optional[TrainingJobRetryStrategyArgs] = None,
                delete_model_packages_on_destroy: Optional[bool] = None,
                delete_vpc_enis_on_destroy: Optional[bool] = None,
                enable_inter_container_traffic_encryption: Optional[bool] = None,
                enable_managed_spot_training: Optional[bool] = None,
                enable_network_isolation: Optional[bool] = None,
                environment: Optional[Mapping[str, str]] = None,
                experiment_config: Optional[TrainingJobExperimentConfigArgs] = None,
                hyper_parameters: Optional[Mapping[str, str]] = None,
                infra_check_config: Optional[TrainingJobInfraCheckConfigArgs] = None,
                input_data_configs: Optional[Sequence[TrainingJobInputDataConfigArgs]] = None,
                mlflow_config: Optional[TrainingJobMlflowConfigArgs] = None,
                model_package_config: Optional[TrainingJobModelPackageConfigArgs] = None,
                debug_rule_configurations: Optional[Sequence[TrainingJobDebugRuleConfigurationArgs]] = None,
                algorithm_specification: Optional[TrainingJobAlgorithmSpecificationArgs] = None,
                serverless_job_config: Optional[TrainingJobServerlessJobConfigArgs] = None,
                region: Optional[str] = None,
                remote_debug_config: Optional[TrainingJobRemoteDebugConfigArgs] = None,
                resource_config: Optional[TrainingJobResourceConfigArgs] = None,
                profiler_config: Optional[TrainingJobProfilerConfigArgs] = None,
                debug_hook_config: Optional[TrainingJobDebugHookConfigArgs] = None,
                profiler_rule_configurations: Optional[Sequence[TrainingJobProfilerRuleConfigurationArgs]] = None,
                session_chaining_config: Optional[TrainingJobSessionChainingConfigArgs] = None,
                stopping_condition: Optional[TrainingJobStoppingConditionArgs] = None,
                tags: Optional[Mapping[str, str]] = None,
                tensor_board_output_config: Optional[TrainingJobTensorBoardOutputConfigArgs] = None,
                timeouts: Optional[TrainingJobTimeoutsArgs] = None,
                checkpoint_config: Optional[TrainingJobCheckpointConfigArgs] = None,
                vpc_config: Optional[TrainingJobVpcConfigArgs] = None)

func NewTrainingJob(ctx *Context, name string, args TrainingJobArgs, opts ...ResourceOption) (*TrainingJob, error)

public TrainingJob(string name, TrainingJobArgs args, CustomResourceOptions? opts = null)

public TrainingJob(String name, TrainingJobArgs args)
public TrainingJob(String name, TrainingJobArgs args, CustomResourceOptions options)

type: aws:sagemaker:TrainingJob
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args TrainingJobArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args TrainingJobArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args TrainingJobArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args TrainingJobArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args TrainingJobArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var trainingJobResource = new Aws.Sagemaker.TrainingJob("trainingJobResource", new()
{
    RoleArn = "string",
    TrainingJobName = "string",
    OutputDataConfig = new Aws.Sagemaker.Inputs.TrainingJobOutputDataConfigArgs
    {
        S3OutputPath = "string",
        CompressionType = "string",
        KmsKeyId = "string",
    },
    RetryStrategy = new Aws.Sagemaker.Inputs.TrainingJobRetryStrategyArgs
    {
        MaximumRetryAttempts = 0,
    },
    DeleteModelPackagesOnDestroy = false,
    DeleteVpcEnisOnDestroy = false,
    EnableInterContainerTrafficEncryption = false,
    EnableManagedSpotTraining = false,
    EnableNetworkIsolation = false,
    Environment = 
    {
        { "string", "string" },
    },
    ExperimentConfig = new Aws.Sagemaker.Inputs.TrainingJobExperimentConfigArgs
    {
        ExperimentName = "string",
        RunName = "string",
        TrialComponentDisplayName = "string",
        TrialName = "string",
    },
    HyperParameters = 
    {
        { "string", "string" },
    },
    InfraCheckConfig = new Aws.Sagemaker.Inputs.TrainingJobInfraCheckConfigArgs
    {
        EnableInfraCheck = false,
    },
    InputDataConfigs = new[]
    {
        new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigArgs
        {
            ChannelName = "string",
            CompressionType = "string",
            ContentType = "string",
            DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceArgs
            {
                FileSystemDataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs
                {
                    DirectoryPath = "string",
                    FileSystemAccessMode = "string",
                    FileSystemId = "string",
                    FileSystemType = "string",
                },
                S3DataSource = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceArgs
                {
                    S3DataType = "string",
                    S3Uri = "string",
                    AttributeNames = new[]
                    {
                        "string",
                    },
                    HubAccessConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs
                    {
                        HubContentArn = "string",
                    },
                    InstanceGroupNames = new[]
                    {
                        "string",
                    },
                    ModelAccessConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs
                    {
                        AcceptEula = false,
                    },
                    S3DataDistributionType = "string",
                },
            },
            InputMode = "string",
            RecordWrapperType = "string",
            ShuffleConfig = new Aws.Sagemaker.Inputs.TrainingJobInputDataConfigShuffleConfigArgs
            {
                Seed = 0,
            },
        },
    },
    MlflowConfig = new Aws.Sagemaker.Inputs.TrainingJobMlflowConfigArgs
    {
        MlflowResourceArn = "string",
        MlflowExperimentName = "string",
        MlflowRunName = "string",
    },
    ModelPackageConfig = new Aws.Sagemaker.Inputs.TrainingJobModelPackageConfigArgs
    {
        ModelPackageGroupArn = "string",
        SourceModelPackageArn = "string",
    },
    DebugRuleConfigurations = new[]
    {
        new Aws.Sagemaker.Inputs.TrainingJobDebugRuleConfigurationArgs
        {
            RuleConfigurationName = "string",
            RuleEvaluatorImage = "string",
            InstanceType = "string",
            LocalPath = "string",
            RuleParameters = 
            {
                { "string", "string" },
            },
            S3OutputPath = "string",
            VolumeSizeInGb = 0,
        },
    },
    AlgorithmSpecification = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationArgs
    {
        AlgorithmName = "string",
        ContainerArguments = new[]
        {
            "string",
        },
        ContainerEntrypoints = new[]
        {
            "string",
        },
        EnableSagemakerMetricsTimeSeries = false,
        MetricDefinitions = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationMetricDefinitionArgs
            {
                Name = "string",
                Regex = "string",
            },
        },
        TrainingImage = "string",
        TrainingImageConfig = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationTrainingImageConfigArgs
        {
            TrainingRepositoryAccessMode = "string",
            TrainingRepositoryAuthConfig = new Aws.Sagemaker.Inputs.TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs
            {
                TrainingRepositoryCredentialsProviderArn = "string",
            },
        },
        TrainingInputMode = "string",
    },
    ServerlessJobConfig = new Aws.Sagemaker.Inputs.TrainingJobServerlessJobConfigArgs
    {
        BaseModelArn = "string",
        JobType = "string",
        AcceptEula = false,
        CustomizationTechnique = "string",
        EvaluationType = "string",
        EvaluatorArn = "string",
        Peft = "string",
    },
    Region = "string",
    RemoteDebugConfig = new Aws.Sagemaker.Inputs.TrainingJobRemoteDebugConfigArgs
    {
        EnableRemoteDebug = false,
    },
    ResourceConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigArgs
    {
        InstanceCount = 0,
        InstanceGroups = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstanceGroupArgs
            {
                InstanceCount = 0,
                InstanceGroupName = "string",
                InstanceType = "string",
            },
        },
        InstancePlacementConfig = new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstancePlacementConfigArgs
        {
            EnableMultipleJobs = false,
            PlacementSpecifications = new[]
            {
                new Aws.Sagemaker.Inputs.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs
                {
                    InstanceCount = 0,
                    UltraServerId = "string",
                },
            },
        },
        InstanceType = "string",
        KeepAlivePeriodInSeconds = 0,
        TrainingPlanArn = "string",
        VolumeKmsKeyId = "string",
        VolumeSizeInGb = 0,
    },
    ProfilerConfig = new Aws.Sagemaker.Inputs.TrainingJobProfilerConfigArgs
    {
        DisableProfiler = false,
        ProfilingIntervalInMilliseconds = 0,
        ProfilingParameters = 
        {
            { "string", "string" },
        },
        S3OutputPath = "string",
    },
    DebugHookConfig = new Aws.Sagemaker.Inputs.TrainingJobDebugHookConfigArgs
    {
        S3OutputPath = "string",
        CollectionConfigurations = new[]
        {
            new Aws.Sagemaker.Inputs.TrainingJobDebugHookConfigCollectionConfigurationArgs
            {
                CollectionName = "string",
                CollectionParameters = 
                {
                    { "string", "string" },
                },
            },
        },
        HookParameters = 
        {
            { "string", "string" },
        },
        LocalPath = "string",
    },
    ProfilerRuleConfigurations = new[]
    {
        new Aws.Sagemaker.Inputs.TrainingJobProfilerRuleConfigurationArgs
        {
            RuleConfigurationName = "string",
            RuleEvaluatorImage = "string",
            InstanceType = "string",
            LocalPath = "string",
            RuleParameters = 
            {
                { "string", "string" },
            },
            S3OutputPath = "string",
            VolumeSizeInGb = 0,
        },
    },
    SessionChainingConfig = new Aws.Sagemaker.Inputs.TrainingJobSessionChainingConfigArgs
    {
        EnableSessionTagChaining = false,
    },
    StoppingCondition = new Aws.Sagemaker.Inputs.TrainingJobStoppingConditionArgs
    {
        MaxPendingTimeInSeconds = 0,
        MaxRuntimeInSeconds = 0,
        MaxWaitTimeInSeconds = 0,
    },
    Tags = 
    {
        { "string", "string" },
    },
    TensorBoardOutputConfig = new Aws.Sagemaker.Inputs.TrainingJobTensorBoardOutputConfigArgs
    {
        S3OutputPath = "string",
        LocalPath = "string",
    },
    Timeouts = new Aws.Sagemaker.Inputs.TrainingJobTimeoutsArgs
    {
        Create = "string",
        Delete = "string",
        Update = "string",
    },
    CheckpointConfig = new Aws.Sagemaker.Inputs.TrainingJobCheckpointConfigArgs
    {
        S3Uri = "string",
        LocalPath = "string",
    },
    VpcConfig = new Aws.Sagemaker.Inputs.TrainingJobVpcConfigArgs
    {
        SecurityGroupIds = new[]
        {
            "string",
        },
        Subnets = new[]
        {
            "string",
        },
    },
});

example, err := sagemaker.NewTrainingJob(ctx, "trainingJobResource", &sagemaker.TrainingJobArgs{
	RoleArn:         pulumi.String("string"),
	TrainingJobName: pulumi.String("string"),
	OutputDataConfig: &sagemaker.TrainingJobOutputDataConfigArgs{
		S3OutputPath:    pulumi.String("string"),
		CompressionType: pulumi.String("string"),
		KmsKeyId:        pulumi.String("string"),
	},
	RetryStrategy: &sagemaker.TrainingJobRetryStrategyArgs{
		MaximumRetryAttempts: pulumi.Int(0),
	},
	DeleteModelPackagesOnDestroy:          pulumi.Bool(false),
	DeleteVpcEnisOnDestroy:                pulumi.Bool(false),
	EnableInterContainerTrafficEncryption: pulumi.Bool(false),
	EnableManagedSpotTraining:             pulumi.Bool(false),
	EnableNetworkIsolation:                pulumi.Bool(false),
	Environment: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
	ExperimentConfig: &sagemaker.TrainingJobExperimentConfigArgs{
		ExperimentName:            pulumi.String("string"),
		RunName:                   pulumi.String("string"),
		TrialComponentDisplayName: pulumi.String("string"),
		TrialName:                 pulumi.String("string"),
	},
	HyperParameters: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
	InfraCheckConfig: &sagemaker.TrainingJobInfraCheckConfigArgs{
		EnableInfraCheck: pulumi.Bool(false),
	},
	InputDataConfigs: sagemaker.TrainingJobInputDataConfigArray{
		&sagemaker.TrainingJobInputDataConfigArgs{
			ChannelName:     pulumi.String("string"),
			CompressionType: pulumi.String("string"),
			ContentType:     pulumi.String("string"),
			DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceArgs{
				FileSystemDataSource: &sagemaker.TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs{
					DirectoryPath:        pulumi.String("string"),
					FileSystemAccessMode: pulumi.String("string"),
					FileSystemId:         pulumi.String("string"),
					FileSystemType:       pulumi.String("string"),
				},
				S3DataSource: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceArgs{
					S3DataType: pulumi.String("string"),
					S3Uri:      pulumi.String("string"),
					AttributeNames: pulumi.StringArray{
						pulumi.String("string"),
					},
					HubAccessConfig: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs{
						HubContentArn: pulumi.String("string"),
					},
					InstanceGroupNames: pulumi.StringArray{
						pulumi.String("string"),
					},
					ModelAccessConfig: &sagemaker.TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs{
						AcceptEula: pulumi.Bool(false),
					},
					S3DataDistributionType: pulumi.String("string"),
				},
			},
			InputMode:         pulumi.String("string"),
			RecordWrapperType: pulumi.String("string"),
			ShuffleConfig: &sagemaker.TrainingJobInputDataConfigShuffleConfigArgs{
				Seed: pulumi.Int(0),
			},
		},
	},
	MlflowConfig: &sagemaker.TrainingJobMlflowConfigArgs{
		MlflowResourceArn:    pulumi.String("string"),
		MlflowExperimentName: pulumi.String("string"),
		MlflowRunName:        pulumi.String("string"),
	},
	ModelPackageConfig: &sagemaker.TrainingJobModelPackageConfigArgs{
		ModelPackageGroupArn:  pulumi.String("string"),
		SourceModelPackageArn: pulumi.String("string"),
	},
	DebugRuleConfigurations: sagemaker.TrainingJobDebugRuleConfigurationArray{
		&sagemaker.TrainingJobDebugRuleConfigurationArgs{
			RuleConfigurationName: pulumi.String("string"),
			RuleEvaluatorImage:    pulumi.String("string"),
			InstanceType:          pulumi.String("string"),
			LocalPath:             pulumi.String("string"),
			RuleParameters: pulumi.StringMap{
				"string": pulumi.String("string"),
			},
			S3OutputPath:   pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	AlgorithmSpecification: &sagemaker.TrainingJobAlgorithmSpecificationArgs{
		AlgorithmName: pulumi.String("string"),
		ContainerArguments: pulumi.StringArray{
			pulumi.String("string"),
		},
		ContainerEntrypoints: pulumi.StringArray{
			pulumi.String("string"),
		},
		EnableSagemakerMetricsTimeSeries: pulumi.Bool(false),
		MetricDefinitions: sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArray{
			&sagemaker.TrainingJobAlgorithmSpecificationMetricDefinitionArgs{
				Name:  pulumi.String("string"),
				Regex: pulumi.String("string"),
			},
		},
		TrainingImage: pulumi.String("string"),
		TrainingImageConfig: &sagemaker.TrainingJobAlgorithmSpecificationTrainingImageConfigArgs{
			TrainingRepositoryAccessMode: pulumi.String("string"),
			TrainingRepositoryAuthConfig: &sagemaker.TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs{
				TrainingRepositoryCredentialsProviderArn: pulumi.String("string"),
			},
		},
		TrainingInputMode: pulumi.String("string"),
	},
	ServerlessJobConfig: &sagemaker.TrainingJobServerlessJobConfigArgs{
		BaseModelArn:           pulumi.String("string"),
		JobType:                pulumi.String("string"),
		AcceptEula:             pulumi.Bool(false),
		CustomizationTechnique: pulumi.String("string"),
		EvaluationType:         pulumi.String("string"),
		EvaluatorArn:           pulumi.String("string"),
		Peft:                   pulumi.String("string"),
	},
	Region: pulumi.String("string"),
	RemoteDebugConfig: &sagemaker.TrainingJobRemoteDebugConfigArgs{
		EnableRemoteDebug: pulumi.Bool(false),
	},
	ResourceConfig: &sagemaker.TrainingJobResourceConfigArgs{
		InstanceCount: pulumi.Int(0),
		InstanceGroups: sagemaker.TrainingJobResourceConfigInstanceGroupArray{
			&sagemaker.TrainingJobResourceConfigInstanceGroupArgs{
				InstanceCount:     pulumi.Int(0),
				InstanceGroupName: pulumi.String("string"),
				InstanceType:      pulumi.String("string"),
			},
		},
		InstancePlacementConfig: &sagemaker.TrainingJobResourceConfigInstancePlacementConfigArgs{
			EnableMultipleJobs: pulumi.Bool(false),
			PlacementSpecifications: sagemaker.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArray{
				&sagemaker.TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs{
					InstanceCount: pulumi.Int(0),
					UltraServerId: pulumi.String("string"),
				},
			},
		},
		InstanceType:             pulumi.String("string"),
		KeepAlivePeriodInSeconds: pulumi.Int(0),
		TrainingPlanArn:          pulumi.String("string"),
		VolumeKmsKeyId:           pulumi.String("string"),
		VolumeSizeInGb:           pulumi.Int(0),
	},
	ProfilerConfig: &sagemaker.TrainingJobProfilerConfigArgs{
		DisableProfiler:                 pulumi.Bool(false),
		ProfilingIntervalInMilliseconds: pulumi.Int(0),
		ProfilingParameters: pulumi.StringMap{
			"string": pulumi.String("string"),
		},
		S3OutputPath: pulumi.String("string"),
	},
	DebugHookConfig: &sagemaker.TrainingJobDebugHookConfigArgs{
		S3OutputPath: pulumi.String("string"),
		CollectionConfigurations: sagemaker.TrainingJobDebugHookConfigCollectionConfigurationArray{
			&sagemaker.TrainingJobDebugHookConfigCollectionConfigurationArgs{
				CollectionName: pulumi.String("string"),
				CollectionParameters: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
		},
		HookParameters: pulumi.StringMap{
			"string": pulumi.String("string"),
		},
		LocalPath: pulumi.String("string"),
	},
	ProfilerRuleConfigurations: sagemaker.TrainingJobProfilerRuleConfigurationArray{
		&sagemaker.TrainingJobProfilerRuleConfigurationArgs{
			RuleConfigurationName: pulumi.String("string"),
			RuleEvaluatorImage:    pulumi.String("string"),
			InstanceType:          pulumi.String("string"),
			LocalPath:             pulumi.String("string"),
			RuleParameters: pulumi.StringMap{
				"string": pulumi.String("string"),
			},
			S3OutputPath:   pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	SessionChainingConfig: &sagemaker.TrainingJobSessionChainingConfigArgs{
		EnableSessionTagChaining: pulumi.Bool(false),
	},
	StoppingCondition: &sagemaker.TrainingJobStoppingConditionArgs{
		MaxPendingTimeInSeconds: pulumi.Int(0),
		MaxRuntimeInSeconds:     pulumi.Int(0),
		MaxWaitTimeInSeconds:    pulumi.Int(0),
	},
	Tags: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
	TensorBoardOutputConfig: &sagemaker.TrainingJobTensorBoardOutputConfigArgs{
		S3OutputPath: pulumi.String("string"),
		LocalPath:    pulumi.String("string"),
	},
	Timeouts: &sagemaker.TrainingJobTimeoutsArgs{
		Create: pulumi.String("string"),
		Delete: pulumi.String("string"),
		Update: pulumi.String("string"),
	},
	CheckpointConfig: &sagemaker.TrainingJobCheckpointConfigArgs{
		S3Uri:     pulumi.String("string"),
		LocalPath: pulumi.String("string"),
	},
	VpcConfig: &sagemaker.TrainingJobVpcConfigArgs{
		SecurityGroupIds: pulumi.StringArray{
			pulumi.String("string"),
		},
		Subnets: pulumi.StringArray{
			pulumi.String("string"),
		},
	},
})

var trainingJobResource = new TrainingJob("trainingJobResource", TrainingJobArgs.builder()
    .roleArn("string")
    .trainingJobName("string")
    .outputDataConfig(TrainingJobOutputDataConfigArgs.builder()
        .s3OutputPath("string")
        .compressionType("string")
        .kmsKeyId("string")
        .build())
    .retryStrategy(TrainingJobRetryStrategyArgs.builder()
        .maximumRetryAttempts(0)
        .build())
    .deleteModelPackagesOnDestroy(false)
    .deleteVpcEnisOnDestroy(false)
    .enableInterContainerTrafficEncryption(false)
    .enableManagedSpotTraining(false)
    .enableNetworkIsolation(false)
    .environment(Map.of("string", "string"))
    .experimentConfig(TrainingJobExperimentConfigArgs.builder()
        .experimentName("string")
        .runName("string")
        .trialComponentDisplayName("string")
        .trialName("string")
        .build())
    .hyperParameters(Map.of("string", "string"))
    .infraCheckConfig(TrainingJobInfraCheckConfigArgs.builder()
        .enableInfraCheck(false)
        .build())
    .inputDataConfigs(TrainingJobInputDataConfigArgs.builder()
        .channelName("string")
        .compressionType("string")
        .contentType("string")
        .dataSource(TrainingJobInputDataConfigDataSourceArgs.builder()
            .fileSystemDataSource(TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs.builder()
                .directoryPath("string")
                .fileSystemAccessMode("string")
                .fileSystemId("string")
                .fileSystemType("string")
                .build())
            .s3DataSource(TrainingJobInputDataConfigDataSourceS3DataSourceArgs.builder()
                .s3DataType("string")
                .s3Uri("string")
                .attributeNames("string")
                .hubAccessConfig(TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs.builder()
                    .hubContentArn("string")
                    .build())
                .instanceGroupNames("string")
                .modelAccessConfig(TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs.builder()
                    .acceptEula(false)
                    .build())
                .s3DataDistributionType("string")
                .build())
            .build())
        .inputMode("string")
        .recordWrapperType("string")
        .shuffleConfig(TrainingJobInputDataConfigShuffleConfigArgs.builder()
            .seed(0)
            .build())
        .build())
    .mlflowConfig(TrainingJobMlflowConfigArgs.builder()
        .mlflowResourceArn("string")
        .mlflowExperimentName("string")
        .mlflowRunName("string")
        .build())
    .modelPackageConfig(TrainingJobModelPackageConfigArgs.builder()
        .modelPackageGroupArn("string")
        .sourceModelPackageArn("string")
        .build())
    .debugRuleConfigurations(TrainingJobDebugRuleConfigurationArgs.builder()
        .ruleConfigurationName("string")
        .ruleEvaluatorImage("string")
        .instanceType("string")
        .localPath("string")
        .ruleParameters(Map.of("string", "string"))
        .s3OutputPath("string")
        .volumeSizeInGb(0)
        .build())
    .algorithmSpecification(TrainingJobAlgorithmSpecificationArgs.builder()
        .algorithmName("string")
        .containerArguments("string")
        .containerEntrypoints("string")
        .enableSagemakerMetricsTimeSeries(false)
        .metricDefinitions(TrainingJobAlgorithmSpecificationMetricDefinitionArgs.builder()
            .name("string")
            .regex("string")
            .build())
        .trainingImage("string")
        .trainingImageConfig(TrainingJobAlgorithmSpecificationTrainingImageConfigArgs.builder()
            .trainingRepositoryAccessMode("string")
            .trainingRepositoryAuthConfig(TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs.builder()
                .trainingRepositoryCredentialsProviderArn("string")
                .build())
            .build())
        .trainingInputMode("string")
        .build())
    .serverlessJobConfig(TrainingJobServerlessJobConfigArgs.builder()
        .baseModelArn("string")
        .jobType("string")
        .acceptEula(false)
        .customizationTechnique("string")
        .evaluationType("string")
        .evaluatorArn("string")
        .peft("string")
        .build())
    .region("string")
    .remoteDebugConfig(TrainingJobRemoteDebugConfigArgs.builder()
        .enableRemoteDebug(false)
        .build())
    .resourceConfig(TrainingJobResourceConfigArgs.builder()
        .instanceCount(0)
        .instanceGroups(TrainingJobResourceConfigInstanceGroupArgs.builder()
            .instanceCount(0)
            .instanceGroupName("string")
            .instanceType("string")
            .build())
        .instancePlacementConfig(TrainingJobResourceConfigInstancePlacementConfigArgs.builder()
            .enableMultipleJobs(false)
            .placementSpecifications(TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs.builder()
                .instanceCount(0)
                .ultraServerId("string")
                .build())
            .build())
        .instanceType("string")
        .keepAlivePeriodInSeconds(0)
        .trainingPlanArn("string")
        .volumeKmsKeyId("string")
        .volumeSizeInGb(0)
        .build())
    .profilerConfig(TrainingJobProfilerConfigArgs.builder()
        .disableProfiler(false)
        .profilingIntervalInMilliseconds(0)
        .profilingParameters(Map.of("string", "string"))
        .s3OutputPath("string")
        .build())
    .debugHookConfig(TrainingJobDebugHookConfigArgs.builder()
        .s3OutputPath("string")
        .collectionConfigurations(TrainingJobDebugHookConfigCollectionConfigurationArgs.builder()
            .collectionName("string")
            .collectionParameters(Map.of("string", "string"))
            .build())
        .hookParameters(Map.of("string", "string"))
        .localPath("string")
        .build())
    .profilerRuleConfigurations(TrainingJobProfilerRuleConfigurationArgs.builder()
        .ruleConfigurationName("string")
        .ruleEvaluatorImage("string")
        .instanceType("string")
        .localPath("string")
        .ruleParameters(Map.of("string", "string"))
        .s3OutputPath("string")
        .volumeSizeInGb(0)
        .build())
    .sessionChainingConfig(TrainingJobSessionChainingConfigArgs.builder()
        .enableSessionTagChaining(false)
        .build())
    .stoppingCondition(TrainingJobStoppingConditionArgs.builder()
        .maxPendingTimeInSeconds(0)
        .maxRuntimeInSeconds(0)
        .maxWaitTimeInSeconds(0)
        .build())
    .tags(Map.of("string", "string"))
    .tensorBoardOutputConfig(TrainingJobTensorBoardOutputConfigArgs.builder()
        .s3OutputPath("string")
        .localPath("string")
        .build())
    .timeouts(TrainingJobTimeoutsArgs.builder()
        .create("string")
        .delete("string")
        .update("string")
        .build())
    .checkpointConfig(TrainingJobCheckpointConfigArgs.builder()
        .s3Uri("string")
        .localPath("string")
        .build())
    .vpcConfig(TrainingJobVpcConfigArgs.builder()
        .securityGroupIds("string")
        .subnets("string")
        .build())
    .build());

training_job_resource = aws.sagemaker.TrainingJob("trainingJobResource",
    role_arn="string",
    training_job_name="string",
    output_data_config={
        "s3_output_path": "string",
        "compression_type": "string",
        "kms_key_id": "string",
    },
    retry_strategy={
        "maximum_retry_attempts": 0,
    },
    delete_model_packages_on_destroy=False,
    delete_vpc_enis_on_destroy=False,
    enable_inter_container_traffic_encryption=False,
    enable_managed_spot_training=False,
    enable_network_isolation=False,
    environment={
        "string": "string",
    },
    experiment_config={
        "experiment_name": "string",
        "run_name": "string",
        "trial_component_display_name": "string",
        "trial_name": "string",
    },
    hyper_parameters={
        "string": "string",
    },
    infra_check_config={
        "enable_infra_check": False,
    },
    input_data_configs=[{
        "channel_name": "string",
        "compression_type": "string",
        "content_type": "string",
        "data_source": {
            "file_system_data_source": {
                "directory_path": "string",
                "file_system_access_mode": "string",
                "file_system_id": "string",
                "file_system_type": "string",
            },
            "s3_data_source": {
                "s3_data_type": "string",
                "s3_uri": "string",
                "attribute_names": ["string"],
                "hub_access_config": {
                    "hub_content_arn": "string",
                },
                "instance_group_names": ["string"],
                "model_access_config": {
                    "accept_eula": False,
                },
                "s3_data_distribution_type": "string",
            },
        },
        "input_mode": "string",
        "record_wrapper_type": "string",
        "shuffle_config": {
            "seed": 0,
        },
    }],
    mlflow_config={
        "mlflow_resource_arn": "string",
        "mlflow_experiment_name": "string",
        "mlflow_run_name": "string",
    },
    model_package_config={
        "model_package_group_arn": "string",
        "source_model_package_arn": "string",
    },
    debug_rule_configurations=[{
        "rule_configuration_name": "string",
        "rule_evaluator_image": "string",
        "instance_type": "string",
        "local_path": "string",
        "rule_parameters": {
            "string": "string",
        },
        "s3_output_path": "string",
        "volume_size_in_gb": 0,
    }],
    algorithm_specification={
        "algorithm_name": "string",
        "container_arguments": ["string"],
        "container_entrypoints": ["string"],
        "enable_sagemaker_metrics_time_series": False,
        "metric_definitions": [{
            "name": "string",
            "regex": "string",
        }],
        "training_image": "string",
        "training_image_config": {
            "training_repository_access_mode": "string",
            "training_repository_auth_config": {
                "training_repository_credentials_provider_arn": "string",
            },
        },
        "training_input_mode": "string",
    },
    serverless_job_config={
        "base_model_arn": "string",
        "job_type": "string",
        "accept_eula": False,
        "customization_technique": "string",
        "evaluation_type": "string",
        "evaluator_arn": "string",
        "peft": "string",
    },
    region="string",
    remote_debug_config={
        "enable_remote_debug": False,
    },
    resource_config={
        "instance_count": 0,
        "instance_groups": [{
            "instance_count": 0,
            "instance_group_name": "string",
            "instance_type": "string",
        }],
        "instance_placement_config": {
            "enable_multiple_jobs": False,
            "placement_specifications": [{
                "instance_count": 0,
                "ultra_server_id": "string",
            }],
        },
        "instance_type": "string",
        "keep_alive_period_in_seconds": 0,
        "training_plan_arn": "string",
        "volume_kms_key_id": "string",
        "volume_size_in_gb": 0,
    },
    profiler_config={
        "disable_profiler": False,
        "profiling_interval_in_milliseconds": 0,
        "profiling_parameters": {
            "string": "string",
        },
        "s3_output_path": "string",
    },
    debug_hook_config={
        "s3_output_path": "string",
        "collection_configurations": [{
            "collection_name": "string",
            "collection_parameters": {
                "string": "string",
            },
        }],
        "hook_parameters": {
            "string": "string",
        },
        "local_path": "string",
    },
    profiler_rule_configurations=[{
        "rule_configuration_name": "string",
        "rule_evaluator_image": "string",
        "instance_type": "string",
        "local_path": "string",
        "rule_parameters": {
            "string": "string",
        },
        "s3_output_path": "string",
        "volume_size_in_gb": 0,
    }],
    session_chaining_config={
        "enable_session_tag_chaining": False,
    },
    stopping_condition={
        "max_pending_time_in_seconds": 0,
        "max_runtime_in_seconds": 0,
        "max_wait_time_in_seconds": 0,
    },
    tags={
        "string": "string",
    },
    tensor_board_output_config={
        "s3_output_path": "string",
        "local_path": "string",
    },
    timeouts={
        "create": "string",
        "delete": "string",
        "update": "string",
    },
    checkpoint_config={
        "s3_uri": "string",
        "local_path": "string",
    },
    vpc_config={
        "security_group_ids": ["string"],
        "subnets": ["string"],
    })

const trainingJobResource = new aws.sagemaker.TrainingJob("trainingJobResource", {
    roleArn: "string",
    trainingJobName: "string",
    outputDataConfig: {
        s3OutputPath: "string",
        compressionType: "string",
        kmsKeyId: "string",
    },
    retryStrategy: {
        maximumRetryAttempts: 0,
    },
    deleteModelPackagesOnDestroy: false,
    deleteVpcEnisOnDestroy: false,
    enableInterContainerTrafficEncryption: false,
    enableManagedSpotTraining: false,
    enableNetworkIsolation: false,
    environment: {
        string: "string",
    },
    experimentConfig: {
        experimentName: "string",
        runName: "string",
        trialComponentDisplayName: "string",
        trialName: "string",
    },
    hyperParameters: {
        string: "string",
    },
    infraCheckConfig: {
        enableInfraCheck: false,
    },
    inputDataConfigs: [{
        channelName: "string",
        compressionType: "string",
        contentType: "string",
        dataSource: {
            fileSystemDataSource: {
                directoryPath: "string",
                fileSystemAccessMode: "string",
                fileSystemId: "string",
                fileSystemType: "string",
            },
            s3DataSource: {
                s3DataType: "string",
                s3Uri: "string",
                attributeNames: ["string"],
                hubAccessConfig: {
                    hubContentArn: "string",
                },
                instanceGroupNames: ["string"],
                modelAccessConfig: {
                    acceptEula: false,
                },
                s3DataDistributionType: "string",
            },
        },
        inputMode: "string",
        recordWrapperType: "string",
        shuffleConfig: {
            seed: 0,
        },
    }],
    mlflowConfig: {
        mlflowResourceArn: "string",
        mlflowExperimentName: "string",
        mlflowRunName: "string",
    },
    modelPackageConfig: {
        modelPackageGroupArn: "string",
        sourceModelPackageArn: "string",
    },
    debugRuleConfigurations: [{
        ruleConfigurationName: "string",
        ruleEvaluatorImage: "string",
        instanceType: "string",
        localPath: "string",
        ruleParameters: {
            string: "string",
        },
        s3OutputPath: "string",
        volumeSizeInGb: 0,
    }],
    algorithmSpecification: {
        algorithmName: "string",
        containerArguments: ["string"],
        containerEntrypoints: ["string"],
        enableSagemakerMetricsTimeSeries: false,
        metricDefinitions: [{
            name: "string",
            regex: "string",
        }],
        trainingImage: "string",
        trainingImageConfig: {
            trainingRepositoryAccessMode: "string",
            trainingRepositoryAuthConfig: {
                trainingRepositoryCredentialsProviderArn: "string",
            },
        },
        trainingInputMode: "string",
    },
    serverlessJobConfig: {
        baseModelArn: "string",
        jobType: "string",
        acceptEula: false,
        customizationTechnique: "string",
        evaluationType: "string",
        evaluatorArn: "string",
        peft: "string",
    },
    region: "string",
    remoteDebugConfig: {
        enableRemoteDebug: false,
    },
    resourceConfig: {
        instanceCount: 0,
        instanceGroups: [{
            instanceCount: 0,
            instanceGroupName: "string",
            instanceType: "string",
        }],
        instancePlacementConfig: {
            enableMultipleJobs: false,
            placementSpecifications: [{
                instanceCount: 0,
                ultraServerId: "string",
            }],
        },
        instanceType: "string",
        keepAlivePeriodInSeconds: 0,
        trainingPlanArn: "string",
        volumeKmsKeyId: "string",
        volumeSizeInGb: 0,
    },
    profilerConfig: {
        disableProfiler: false,
        profilingIntervalInMilliseconds: 0,
        profilingParameters: {
            string: "string",
        },
        s3OutputPath: "string",
    },
    debugHookConfig: {
        s3OutputPath: "string",
        collectionConfigurations: [{
            collectionName: "string",
            collectionParameters: {
                string: "string",
            },
        }],
        hookParameters: {
            string: "string",
        },
        localPath: "string",
    },
    profilerRuleConfigurations: [{
        ruleConfigurationName: "string",
        ruleEvaluatorImage: "string",
        instanceType: "string",
        localPath: "string",
        ruleParameters: {
            string: "string",
        },
        s3OutputPath: "string",
        volumeSizeInGb: 0,
    }],
    sessionChainingConfig: {
        enableSessionTagChaining: false,
    },
    stoppingCondition: {
        maxPendingTimeInSeconds: 0,
        maxRuntimeInSeconds: 0,
        maxWaitTimeInSeconds: 0,
    },
    tags: {
        string: "string",
    },
    tensorBoardOutputConfig: {
        s3OutputPath: "string",
        localPath: "string",
    },
    timeouts: {
        create: "string",
        "delete": "string",
        update: "string",
    },
    checkpointConfig: {
        s3Uri: "string",
        localPath: "string",
    },
    vpcConfig: {
        securityGroupIds: ["string"],
        subnets: ["string"],
    },
});

type: aws:sagemaker:TrainingJob
properties:
    algorithmSpecification:
        algorithmName: string
        containerArguments:
            - string
        containerEntrypoints:
            - string
        enableSagemakerMetricsTimeSeries: false
        metricDefinitions:
            - name: string
              regex: string
        trainingImage: string
        trainingImageConfig:
            trainingRepositoryAccessMode: string
            trainingRepositoryAuthConfig:
                trainingRepositoryCredentialsProviderArn: string
        trainingInputMode: string
    checkpointConfig:
        localPath: string
        s3Uri: string
    debugHookConfig:
        collectionConfigurations:
            - collectionName: string
              collectionParameters:
                string: string
        hookParameters:
            string: string
        localPath: string
        s3OutputPath: string
    debugRuleConfigurations:
        - instanceType: string
          localPath: string
          ruleConfigurationName: string
          ruleEvaluatorImage: string
          ruleParameters:
            string: string
          s3OutputPath: string
          volumeSizeInGb: 0
    deleteModelPackagesOnDestroy: false
    deleteVpcEnisOnDestroy: false
    enableInterContainerTrafficEncryption: false
    enableManagedSpotTraining: false
    enableNetworkIsolation: false
    environment:
        string: string
    experimentConfig:
        experimentName: string
        runName: string
        trialComponentDisplayName: string
        trialName: string
    hyperParameters:
        string: string
    infraCheckConfig:
        enableInfraCheck: false
    inputDataConfigs:
        - channelName: string
          compressionType: string
          contentType: string
          dataSource:
            fileSystemDataSource:
                directoryPath: string
                fileSystemAccessMode: string
                fileSystemId: string
                fileSystemType: string
            s3DataSource:
                attributeNames:
                    - string
                hubAccessConfig:
                    hubContentArn: string
                instanceGroupNames:
                    - string
                modelAccessConfig:
                    acceptEula: false
                s3DataDistributionType: string
                s3DataType: string
                s3Uri: string
          inputMode: string
          recordWrapperType: string
          shuffleConfig:
            seed: 0
    mlflowConfig:
        mlflowExperimentName: string
        mlflowResourceArn: string
        mlflowRunName: string
    modelPackageConfig:
        modelPackageGroupArn: string
        sourceModelPackageArn: string
    outputDataConfig:
        compressionType: string
        kmsKeyId: string
        s3OutputPath: string
    profilerConfig:
        disableProfiler: false
        profilingIntervalInMilliseconds: 0
        profilingParameters:
            string: string
        s3OutputPath: string
    profilerRuleConfigurations:
        - instanceType: string
          localPath: string
          ruleConfigurationName: string
          ruleEvaluatorImage: string
          ruleParameters:
            string: string
          s3OutputPath: string
          volumeSizeInGb: 0
    region: string
    remoteDebugConfig:
        enableRemoteDebug: false
    resourceConfig:
        instanceCount: 0
        instanceGroups:
            - instanceCount: 0
              instanceGroupName: string
              instanceType: string
        instancePlacementConfig:
            enableMultipleJobs: false
            placementSpecifications:
                - instanceCount: 0
                  ultraServerId: string
        instanceType: string
        keepAlivePeriodInSeconds: 0
        trainingPlanArn: string
        volumeKmsKeyId: string
        volumeSizeInGb: 0
    retryStrategy:
        maximumRetryAttempts: 0
    roleArn: string
    serverlessJobConfig:
        acceptEula: false
        baseModelArn: string
        customizationTechnique: string
        evaluationType: string
        evaluatorArn: string
        jobType: string
        peft: string
    sessionChainingConfig:
        enableSessionTagChaining: false
    stoppingCondition:
        maxPendingTimeInSeconds: 0
        maxRuntimeInSeconds: 0
        maxWaitTimeInSeconds: 0
    tags:
        string: string
    tensorBoardOutputConfig:
        localPath: string
        s3OutputPath: string
    timeouts:
        create: string
        delete: string
        update: string
    trainingJobName: string
    vpcConfig:
        securityGroupIds:
            - string
        subnets:
            - string

TrainingJob Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The TrainingJob resource accepts the following input properties:

RoleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

TrainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

AlgorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

CheckpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

DebugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

DebugRuleConfigurations List<TrainingJobDebugRuleConfiguration>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

DeleteModelPackagesOnDestroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

DeleteVpcEnisOnDestroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

EnableInterContainerTrafficEncryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

EnableManagedSpotTraining bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

EnableNetworkIsolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

Environment Dictionary<string, string>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

ExperimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

HyperParameters Dictionary<string, string>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

InfraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

InputDataConfigs List<TrainingJobInputDataConfig>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

MlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

ModelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

OutputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

ProfilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

ProfilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

Region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

RemoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

ResourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

RetryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

ServerlessJobConfig TrainingJobServerlessJobConfig

Configuration for serverless training jobs using foundation models. Conflicts with algorithmSpecification, enableManagedSpotTraining, environment, retryStrategy, checkpointConfig, debugHookConfig, experimentConfig, profilerConfig, profilerRuleConfigurations, and tensorBoardOutputConfig. See serverlessJobConfig below.

SessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

StoppingCondition TrainingJobStoppingCondition

Tags Dictionary<string, string>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

Timeouts TrainingJobTimeouts

VpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

RoleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

TrainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

AlgorithmSpecification TrainingJobAlgorithmSpecificationArgs

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

CheckpointConfig TrainingJobCheckpointConfigArgs

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

DebugHookConfig TrainingJobDebugHookConfigArgs

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

DebugRuleConfigurations []TrainingJobDebugRuleConfigurationArgs

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

DeleteModelPackagesOnDestroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

DeleteVpcEnisOnDestroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

EnableInterContainerTrafficEncryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

EnableManagedSpotTraining bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

EnableNetworkIsolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

Environment map[string]string

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

ExperimentConfig TrainingJobExperimentConfigArgs

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

HyperParameters map[string]string

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

InfraCheckConfig TrainingJobInfraCheckConfigArgs

Infrastructure health check configuration. See infraCheckConfig below.

InputDataConfigs []TrainingJobInputDataConfigArgs

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

MlflowConfig TrainingJobMlflowConfigArgs

MLflow integration configuration. See mlflowConfig below.

ModelPackageConfig TrainingJobModelPackageConfigArgs

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

OutputDataConfig TrainingJobOutputDataConfigArgs

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

ProfilerConfig TrainingJobProfilerConfigArgs

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

ProfilerRuleConfigurations []TrainingJobProfilerRuleConfigurationArgs

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

Region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

RemoteDebugConfig TrainingJobRemoteDebugConfigArgs

Configuration for remote debugging. See remoteDebugConfig below.

ResourceConfig TrainingJobResourceConfigArgs

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

RetryStrategy TrainingJobRetryStrategyArgs

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

ServerlessJobConfig TrainingJobServerlessJobConfigArgs

SessionChainingConfig TrainingJobSessionChainingConfigArgs

Configuration for session tag chaining. See sessionChainingConfig below.

StoppingCondition TrainingJobStoppingConditionArgs

Tags map[string]string

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TensorBoardOutputConfig TrainingJobTensorBoardOutputConfigArgs

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

Timeouts TrainingJobTimeoutsArgs

VpcConfig TrainingJobVpcConfigArgs

VPC configuration for the training job. See vpcConfig below.

roleArn String

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

trainingJobName String

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

algorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

checkpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations List<TrainingJobDebugRuleConfiguration>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy Boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy Boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption Boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining Boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation Boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Map<String,String>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters Map<String,String>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs List<TrainingJobInputDataConfig>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region String

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

serverlessJobConfig TrainingJobServerlessJobConfig

sessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition TrainingJobStoppingCondition

tags Map<String,String>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeouts

vpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

roleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

trainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

algorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

checkpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations TrainingJobDebugRuleConfiguration[]

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment {[key: string]: string}

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters {[key: string]: string}

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs TrainingJobInputDataConfig[]

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations TrainingJobProfilerRuleConfiguration[]

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

serverlessJobConfig TrainingJobServerlessJobConfig

sessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition TrainingJobStoppingCondition

tags {[key: string]: string}

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeouts

vpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

role_arn str

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

training_job_name str

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

algorithm_specification TrainingJobAlgorithmSpecificationArgs

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

checkpoint_config TrainingJobCheckpointConfigArgs

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debug_hook_config TrainingJobDebugHookConfigArgs

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debug_rule_configurations Sequence[TrainingJobDebugRuleConfigurationArgs]

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

delete_model_packages_on_destroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

delete_vpc_enis_on_destroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enable_inter_container_traffic_encryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enable_managed_spot_training bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enable_network_isolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Mapping[str, str]

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experiment_config TrainingJobExperimentConfigArgs

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyper_parameters Mapping[str, str]

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infra_check_config TrainingJobInfraCheckConfigArgs

Infrastructure health check configuration. See infraCheckConfig below.

input_data_configs Sequence[TrainingJobInputDataConfigArgs]

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflow_config TrainingJobMlflowConfigArgs

MLflow integration configuration. See mlflowConfig below.

model_package_config TrainingJobModelPackageConfigArgs

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

output_data_config TrainingJobOutputDataConfigArgs

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profiler_config TrainingJobProfilerConfigArgs

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profiler_rule_configurations Sequence[TrainingJobProfilerRuleConfigurationArgs]

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region str

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remote_debug_config TrainingJobRemoteDebugConfigArgs

Configuration for remote debugging. See remoteDebugConfig below.

resource_config TrainingJobResourceConfigArgs

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retry_strategy TrainingJobRetryStrategyArgs

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

serverless_job_config TrainingJobServerlessJobConfigArgs

session_chaining_config TrainingJobSessionChainingConfigArgs

Configuration for session tag chaining. See sessionChainingConfig below.

stopping_condition TrainingJobStoppingConditionArgs

tags Mapping[str, str]

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tensor_board_output_config TrainingJobTensorBoardOutputConfigArgs

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeoutsArgs

vpc_config TrainingJobVpcConfigArgs

VPC configuration for the training job. See vpcConfig below.

roleArn String

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

trainingJobName String

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

algorithmSpecification Property Map

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

checkpointConfig Property Map

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig Property Map

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations List<Property Map>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy Boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy Boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption Boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining Boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation Boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Map<String>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig Property Map

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters Map<String>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig Property Map

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs List<Property Map>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig Property Map

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig Property Map

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig Property Map

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig Property Map

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations List<Property Map>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region String

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig Property Map

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig Property Map

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy Property Map

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

serverlessJobConfig Property Map

sessionChainingConfig Property Map

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition Property Map

tags Map<String>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tensorBoardOutputConfig Property Map

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts Property Map

vpcConfig Property Map

VPC configuration for the training job. See vpcConfig below.

Outputs

All input properties are implicitly available as output properties. Additionally, the TrainingJob resource produces the following output properties:

Arn string: ARN of the Training Job.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll Dictionary<string, string>: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

Arn string: ARN of the Training Job.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll map[string]string: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

arn String: ARN of the Training Job.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String,String>: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

arn string: ARN of the Training Job.
id string: The provider-assigned unique ID for this managed resource.
tagsAll {[key: string]: string}: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

arn str: ARN of the Training Job.
id str: The provider-assigned unique ID for this managed resource.
tags_all Mapping[str, str]: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

arn String: ARN of the Training Job.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String>: Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

Look up Existing TrainingJob Resource

Get an existing TrainingJob resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: TrainingJobState, opts?: CustomResourceOptions): TrainingJob

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        algorithm_specification: Optional[TrainingJobAlgorithmSpecificationArgs] = None,
        arn: Optional[str] = None,
        checkpoint_config: Optional[TrainingJobCheckpointConfigArgs] = None,
        debug_hook_config: Optional[TrainingJobDebugHookConfigArgs] = None,
        debug_rule_configurations: Optional[Sequence[TrainingJobDebugRuleConfigurationArgs]] = None,
        delete_model_packages_on_destroy: Optional[bool] = None,
        delete_vpc_enis_on_destroy: Optional[bool] = None,
        enable_inter_container_traffic_encryption: Optional[bool] = None,
        enable_managed_spot_training: Optional[bool] = None,
        enable_network_isolation: Optional[bool] = None,
        environment: Optional[Mapping[str, str]] = None,
        experiment_config: Optional[TrainingJobExperimentConfigArgs] = None,
        hyper_parameters: Optional[Mapping[str, str]] = None,
        infra_check_config: Optional[TrainingJobInfraCheckConfigArgs] = None,
        input_data_configs: Optional[Sequence[TrainingJobInputDataConfigArgs]] = None,
        mlflow_config: Optional[TrainingJobMlflowConfigArgs] = None,
        model_package_config: Optional[TrainingJobModelPackageConfigArgs] = None,
        output_data_config: Optional[TrainingJobOutputDataConfigArgs] = None,
        profiler_config: Optional[TrainingJobProfilerConfigArgs] = None,
        profiler_rule_configurations: Optional[Sequence[TrainingJobProfilerRuleConfigurationArgs]] = None,
        region: Optional[str] = None,
        remote_debug_config: Optional[TrainingJobRemoteDebugConfigArgs] = None,
        resource_config: Optional[TrainingJobResourceConfigArgs] = None,
        retry_strategy: Optional[TrainingJobRetryStrategyArgs] = None,
        role_arn: Optional[str] = None,
        serverless_job_config: Optional[TrainingJobServerlessJobConfigArgs] = None,
        session_chaining_config: Optional[TrainingJobSessionChainingConfigArgs] = None,
        stopping_condition: Optional[TrainingJobStoppingConditionArgs] = None,
        tags: Optional[Mapping[str, str]] = None,
        tags_all: Optional[Mapping[str, str]] = None,
        tensor_board_output_config: Optional[TrainingJobTensorBoardOutputConfigArgs] = None,
        timeouts: Optional[TrainingJobTimeoutsArgs] = None,
        training_job_name: Optional[str] = None,
        vpc_config: Optional[TrainingJobVpcConfigArgs] = None) -> TrainingJob

func GetTrainingJob(ctx *Context, name string, id IDInput, state *TrainingJobState, opts ...ResourceOption) (*TrainingJob, error)

public static TrainingJob Get(string name, Input<string> id, TrainingJobState? state, CustomResourceOptions? opts = null)

public static TrainingJob get(String name, Output<String> id, TrainingJobState state, CustomResourceOptions options)

resources:  _:    type: aws:sagemaker:TrainingJob    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

AlgorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

Arn string

ARN of the Training Job.

CheckpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

DebugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

DebugRuleConfigurations List<TrainingJobDebugRuleConfiguration>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

DeleteModelPackagesOnDestroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

DeleteVpcEnisOnDestroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

EnableInterContainerTrafficEncryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

EnableManagedSpotTraining bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

EnableNetworkIsolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

Environment Dictionary<string, string>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

ExperimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

HyperParameters Dictionary<string, string>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

InfraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

InputDataConfigs List<TrainingJobInputDataConfig>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

MlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

ModelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

OutputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

ProfilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

ProfilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

Region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

RemoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

ResourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

RetryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

RoleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

ServerlessJobConfig TrainingJobServerlessJobConfig

SessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

StoppingCondition TrainingJobStoppingCondition

Tags Dictionary<string, string>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TagsAll Dictionary<string, string>

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

TensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

Timeouts TrainingJobTimeouts

TrainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

VpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

AlgorithmSpecification TrainingJobAlgorithmSpecificationArgs

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

Arn string

ARN of the Training Job.

CheckpointConfig TrainingJobCheckpointConfigArgs

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

DebugHookConfig TrainingJobDebugHookConfigArgs

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

DebugRuleConfigurations []TrainingJobDebugRuleConfigurationArgs

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

DeleteModelPackagesOnDestroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

DeleteVpcEnisOnDestroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

EnableInterContainerTrafficEncryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

EnableManagedSpotTraining bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

EnableNetworkIsolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

Environment map[string]string

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

ExperimentConfig TrainingJobExperimentConfigArgs

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

HyperParameters map[string]string

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

InfraCheckConfig TrainingJobInfraCheckConfigArgs

Infrastructure health check configuration. See infraCheckConfig below.

InputDataConfigs []TrainingJobInputDataConfigArgs

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

MlflowConfig TrainingJobMlflowConfigArgs

MLflow integration configuration. See mlflowConfig below.

ModelPackageConfig TrainingJobModelPackageConfigArgs

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

OutputDataConfig TrainingJobOutputDataConfigArgs

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

ProfilerConfig TrainingJobProfilerConfigArgs

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

ProfilerRuleConfigurations []TrainingJobProfilerRuleConfigurationArgs

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

Region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

RemoteDebugConfig TrainingJobRemoteDebugConfigArgs

Configuration for remote debugging. See remoteDebugConfig below.

ResourceConfig TrainingJobResourceConfigArgs

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

RetryStrategy TrainingJobRetryStrategyArgs

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

RoleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

ServerlessJobConfig TrainingJobServerlessJobConfigArgs

SessionChainingConfig TrainingJobSessionChainingConfigArgs

Configuration for session tag chaining. See sessionChainingConfig below.

StoppingCondition TrainingJobStoppingConditionArgs

Tags map[string]string

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TagsAll map[string]string

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

TensorBoardOutputConfig TrainingJobTensorBoardOutputConfigArgs

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

Timeouts TrainingJobTimeoutsArgs

TrainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

VpcConfig TrainingJobVpcConfigArgs

VPC configuration for the training job. See vpcConfig below.

algorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

arn String

ARN of the Training Job.

checkpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations List<TrainingJobDebugRuleConfiguration>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy Boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy Boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption Boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining Boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation Boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Map<String,String>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters Map<String,String>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs List<TrainingJobInputDataConfig>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations List<TrainingJobProfilerRuleConfiguration>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region String

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

roleArn String

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

serverlessJobConfig TrainingJobServerlessJobConfig

sessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition TrainingJobStoppingCondition

tags Map<String,String>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll Map<String,String>

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeouts

trainingJobName String

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

vpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

algorithmSpecification TrainingJobAlgorithmSpecification

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

arn string

ARN of the Training Job.

checkpointConfig TrainingJobCheckpointConfig

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig TrainingJobDebugHookConfig

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations TrainingJobDebugRuleConfiguration[]

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment {[key: string]: string}

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig TrainingJobExperimentConfig

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters {[key: string]: string}

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig TrainingJobInfraCheckConfig

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs TrainingJobInputDataConfig[]

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig TrainingJobMlflowConfig

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig TrainingJobModelPackageConfig

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig TrainingJobOutputDataConfig

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig TrainingJobProfilerConfig

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations TrainingJobProfilerRuleConfiguration[]

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region string

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig TrainingJobRemoteDebugConfig

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig TrainingJobResourceConfig

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy TrainingJobRetryStrategy

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

roleArn string

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

serverlessJobConfig TrainingJobServerlessJobConfig

sessionChainingConfig TrainingJobSessionChainingConfig

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition TrainingJobStoppingCondition

tags {[key: string]: string}

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll {[key: string]: string}

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

tensorBoardOutputConfig TrainingJobTensorBoardOutputConfig

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeouts

trainingJobName string

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

vpcConfig TrainingJobVpcConfig

VPC configuration for the training job. See vpcConfig below.

algorithm_specification TrainingJobAlgorithmSpecificationArgs

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

arn str

ARN of the Training Job.

checkpoint_config TrainingJobCheckpointConfigArgs

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debug_hook_config TrainingJobDebugHookConfigArgs

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debug_rule_configurations Sequence[TrainingJobDebugRuleConfigurationArgs]

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

delete_model_packages_on_destroy bool

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

delete_vpc_enis_on_destroy bool

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enable_inter_container_traffic_encryption bool

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enable_managed_spot_training bool

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enable_network_isolation bool

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Mapping[str, str]

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experiment_config TrainingJobExperimentConfigArgs

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyper_parameters Mapping[str, str]

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infra_check_config TrainingJobInfraCheckConfigArgs

Infrastructure health check configuration. See infraCheckConfig below.

input_data_configs Sequence[TrainingJobInputDataConfigArgs]

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflow_config TrainingJobMlflowConfigArgs

MLflow integration configuration. See mlflowConfig below.

model_package_config TrainingJobModelPackageConfigArgs

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

output_data_config TrainingJobOutputDataConfigArgs

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profiler_config TrainingJobProfilerConfigArgs

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profiler_rule_configurations Sequence[TrainingJobProfilerRuleConfigurationArgs]

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region str

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remote_debug_config TrainingJobRemoteDebugConfigArgs

Configuration for remote debugging. See remoteDebugConfig below.

resource_config TrainingJobResourceConfigArgs

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retry_strategy TrainingJobRetryStrategyArgs

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

role_arn str

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

serverless_job_config TrainingJobServerlessJobConfigArgs

session_chaining_config TrainingJobSessionChainingConfigArgs

Configuration for session tag chaining. See sessionChainingConfig below.

stopping_condition TrainingJobStoppingConditionArgs

tags Mapping[str, str]

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tags_all Mapping[str, str]

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

tensor_board_output_config TrainingJobTensorBoardOutputConfigArgs

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts TrainingJobTimeoutsArgs

training_job_name str

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

vpc_config TrainingJobVpcConfigArgs

VPC configuration for the training job. See vpcConfig below.

algorithmSpecification Property Map

Algorithm-related parameters of the training job. See algorithmSpecification below. Conflicts with serverlessJobConfig.

arn String

ARN of the Training Job.

checkpointConfig Property Map

Location of checkpoints during training. See checkpointConfig below. Conflicts with serverlessJobConfig.

debugHookConfig Property Map

Configuration for debugging rules. See debugHookConfig below. Conflicts with serverlessJobConfig.

debugRuleConfigurations List<Property Map>

List of debug rule configurations. Maximum of 20. See debugRuleConfigurations below.

deleteModelPackagesOnDestroy Boolean

Whether to delete model packages in the configured model package group when the training job is destroyed. Default is false.

deleteVpcEnisOnDestroy Boolean

Whether to delete detached VPC ENIs SageMaker may leave behind when the training job is destroyed. Default is false.

enableInterContainerTrafficEncryption Boolean

Whether to encrypt inter-container traffic. When enabled, communications between containers are encrypted.

enableManagedSpotTraining Boolean

Whether to use managed spot training. Optimizes the cost of training by using Amazon EC2 Spot Instances. Conflicts with serverlessJobConfig.

enableNetworkIsolation Boolean

Whether to isolate the training container from the network. No inbound or outbound network calls can be made.

environment Map<String>

Map of environment variables to set in the training container. Maximum of 100 entries. Conflicts with serverlessJobConfig.

experimentConfig Property Map

Associates a SageMaker AI Experiment or Trial to the training job. See experimentConfig below. Conflicts with serverlessJobConfig.

hyperParameters Map<String>

Map of hyperparameters for the training algorithm. Maximum of 100 entries.

infraCheckConfig Property Map

Infrastructure health check configuration. See infraCheckConfig below.

inputDataConfigs List<Property Map>

List of input data channel configurations for the training job. Maximum of 20. See inputDataConfig below.

mlflowConfig Property Map

MLflow integration configuration. See mlflowConfig below.

modelPackageConfig Property Map

Model package configuration. Requires serverlessJobConfig. See modelPackageConfig below.

outputDataConfig Property Map

Location of the output data from the training job. See outputDataConfig below.

The following arguments are optional:

profilerConfig Property Map

Configuration for the profiler. See profilerConfig below. Conflicts with serverlessJobConfig.

profilerRuleConfigurations List<Property Map>

List of profiler rule configurations. Maximum of 20. See profilerRuleConfigurations below. Conflicts with serverlessJobConfig.

region String

Region where this resource will be managed. Defaults to the Region set in the provider configuration.

remoteDebugConfig Property Map

Configuration for remote debugging. See remoteDebugConfig below.

resourceConfig Property Map

Resources for the training job, including compute instances and storage volumes. See resourceConfig below.

retryStrategy Property Map

Number of times to retry the job if it fails. See retryStrategy below. Conflicts with serverlessJobConfig.

roleArn String

ARN of the IAM role that SageMaker AI assumes to perform tasks on your behalf during training.

serverlessJobConfig Property Map

sessionChainingConfig Property Map

Configuration for session tag chaining. See sessionChainingConfig below.

stoppingCondition Property Map

tags Map<String>

Map of tags to assign to the resource. If configured with a provider defaultTags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll Map<String>

Map of tags assigned to the resource, including those inherited from the provider defaultTags configuration block.

tensorBoardOutputConfig Property Map

Configuration for TensorBoard output. See tensorBoardOutputConfig below. Conflicts with serverlessJobConfig.

timeouts Property Map

trainingJobName String

Name of the training job. Must be between 1 and 63 characters, start with a letter or number, and contain only letters, numbers, and hyphens.

vpcConfig Property Map

VPC configuration for the training job. See vpcConfig below.

Supporting Types

TrainingJobAlgorithmSpecification, TrainingJobAlgorithmSpecificationArgs

AlgorithmName string: Name or ARN of the algorithm resource to use for the training job.
ContainerArguments List<string>: List of arguments for the container entrypoint. Maximum of 100 entries.
ContainerEntrypoints List<string>: List of entrypoint commands for the container. Maximum of 100 entries.
EnableSagemakerMetricsTimeSeries bool: Whether to enable SageMaker AI metrics time series collection.
MetricDefinitions List<TrainingJobAlgorithmSpecificationMetricDefinition>: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
TrainingImage string: Registry path of the Docker image that contains the training algorithm.
TrainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig: Training image configuration. See trainingImageConfig below.
TrainingInputMode string: Input mode for the training data. Valid values: File, Pipe, FastFile.

AlgorithmName string: Name or ARN of the algorithm resource to use for the training job.
ContainerArguments []string: List of arguments for the container entrypoint. Maximum of 100 entries.
ContainerEntrypoints []string: List of entrypoint commands for the container. Maximum of 100 entries.
EnableSagemakerMetricsTimeSeries bool: Whether to enable SageMaker AI metrics time series collection.
MetricDefinitions []TrainingJobAlgorithmSpecificationMetricDefinition: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
TrainingImage string: Registry path of the Docker image that contains the training algorithm.
TrainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig: Training image configuration. See trainingImageConfig below.
TrainingInputMode string: Input mode for the training data. Valid values: File, Pipe, FastFile.

algorithmName String: Name or ARN of the algorithm resource to use for the training job.
containerArguments List<String>: List of arguments for the container entrypoint. Maximum of 100 entries.
containerEntrypoints List<String>: List of entrypoint commands for the container. Maximum of 100 entries.
enableSagemakerMetricsTimeSeries Boolean: Whether to enable SageMaker AI metrics time series collection.
metricDefinitions List<TrainingJobAlgorithmSpecificationMetricDefinition>: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
trainingImage String: Registry path of the Docker image that contains the training algorithm.
trainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig: Training image configuration. See trainingImageConfig below.
trainingInputMode String: Input mode for the training data. Valid values: File, Pipe, FastFile.

algorithmName string: Name or ARN of the algorithm resource to use for the training job.
containerArguments string[]: List of arguments for the container entrypoint. Maximum of 100 entries.
containerEntrypoints string[]: List of entrypoint commands for the container. Maximum of 100 entries.
enableSagemakerMetricsTimeSeries boolean: Whether to enable SageMaker AI metrics time series collection.
metricDefinitions TrainingJobAlgorithmSpecificationMetricDefinition[]: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
trainingImage string: Registry path of the Docker image that contains the training algorithm.
trainingImageConfig TrainingJobAlgorithmSpecificationTrainingImageConfig: Training image configuration. See trainingImageConfig below.
trainingInputMode string: Input mode for the training data. Valid values: File, Pipe, FastFile.

algorithm_name str: Name or ARN of the algorithm resource to use for the training job.
container_arguments Sequence[str]: List of arguments for the container entrypoint. Maximum of 100 entries.
container_entrypoints Sequence[str]: List of entrypoint commands for the container. Maximum of 100 entries.
enable_sagemaker_metrics_time_series bool: Whether to enable SageMaker AI metrics time series collection.
metric_definitions Sequence[TrainingJobAlgorithmSpecificationMetricDefinition]: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
training_image str: Registry path of the Docker image that contains the training algorithm.
training_image_config TrainingJobAlgorithmSpecificationTrainingImageConfig: Training image configuration. See trainingImageConfig below.
training_input_mode str: Input mode for the training data. Valid values: File, Pipe, FastFile.

algorithmName String: Name or ARN of the algorithm resource to use for the training job.
containerArguments List<String>: List of arguments for the container entrypoint. Maximum of 100 entries.
containerEntrypoints List<String>: List of entrypoint commands for the container. Maximum of 100 entries.
enableSagemakerMetricsTimeSeries Boolean: Whether to enable SageMaker AI metrics time series collection.
metricDefinitions List<Property Map>: List of metric definitions for the training job. Maximum of 40. Use this to extract custom metrics from your own training container logs. SageMaker can still publish built-in metrics for built-in algorithms and supported prebuilt images when this block is omitted. See metricDefinitions below.
trainingImage String: Registry path of the Docker image that contains the training algorithm.
trainingImageConfig Property Map: Training image configuration. See trainingImageConfig below.
trainingInputMode String: Input mode for the training data. Valid values: File, Pipe, FastFile.

TrainingJobAlgorithmSpecificationMetricDefinition, TrainingJobAlgorithmSpecificationMetricDefinitionArgs

Name string: Name of the metric.
Regex string: Regular expression that searches the output of the training job and captures the value of the metric.

Name string: Name of the metric.
Regex string: Regular expression that searches the output of the training job and captures the value of the metric.

name String: Name of the metric.
regex String: Regular expression that searches the output of the training job and captures the value of the metric.

name string: Name of the metric.
regex string: Regular expression that searches the output of the training job and captures the value of the metric.

name str: Name of the metric.
regex str: Regular expression that searches the output of the training job and captures the value of the metric.

name String: Name of the metric.
regex String: Regular expression that searches the output of the training job and captures the value of the metric.

TrainingJobAlgorithmSpecificationTrainingImageConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigArgs

TrainingRepositoryAccessMode string: Access mode for the training image repository.
TrainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

TrainingRepositoryAccessMode string: Access mode for the training image repository.
TrainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

trainingRepositoryAccessMode String: Access mode for the training image repository.
trainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

trainingRepositoryAccessMode string: Access mode for the training image repository.
trainingRepositoryAuthConfig TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

training_repository_access_mode str: Access mode for the training image repository.
training_repository_auth_config TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

trainingRepositoryAccessMode String: Access mode for the training image repository.
trainingRepositoryAuthConfig Property Map: Authentication configuration for the training image repository. See trainingRepositoryAuthConfig below.

TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs

TrainingRepositoryCredentialsProviderArn string: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

TrainingRepositoryCredentialsProviderArn string: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

trainingRepositoryCredentialsProviderArn String: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

trainingRepositoryCredentialsProviderArn string: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

training_repository_credentials_provider_arn str: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

trainingRepositoryCredentialsProviderArn String: ARN of the Lambda function that provides credentials to authenticate to the private Docker registry.

TrainingJobCheckpointConfig, TrainingJobCheckpointConfigArgs

S3Uri string: S3 URI where checkpoints are stored.
LocalPath string: Local path where checkpoints are written.

S3Uri string: S3 URI where checkpoints are stored.
LocalPath string: Local path where checkpoints are written.

s3Uri String: S3 URI where checkpoints are stored.
localPath String: Local path where checkpoints are written.

s3Uri string: S3 URI where checkpoints are stored.
localPath string: Local path where checkpoints are written.

s3_uri str: S3 URI where checkpoints are stored.
local_path str: Local path where checkpoints are written.

s3Uri String: S3 URI where checkpoints are stored.
localPath String: Local path where checkpoints are written.

TrainingJobDebugHookConfig, TrainingJobDebugHookConfigArgs

S3OutputPath string: S3 URI where debug output is stored.
CollectionConfigurations List<TrainingJobDebugHookConfigCollectionConfiguration>: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
HookParameters Dictionary<string, string>: Map of parameters for the debug hook. Maximum of 20 entries.
LocalPath string: Local path where debug output is written.

S3OutputPath string: S3 URI where debug output is stored.
CollectionConfigurations []TrainingJobDebugHookConfigCollectionConfiguration: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
HookParameters map[string]string: Map of parameters for the debug hook. Maximum of 20 entries.
LocalPath string: Local path where debug output is written.

s3OutputPath String: S3 URI where debug output is stored.
collectionConfigurations List<TrainingJobDebugHookConfigCollectionConfiguration>: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
hookParameters Map<String,String>: Map of parameters for the debug hook. Maximum of 20 entries.
localPath String: Local path where debug output is written.

s3OutputPath string: S3 URI where debug output is stored.
collectionConfigurations TrainingJobDebugHookConfigCollectionConfiguration[]: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
hookParameters {[key: string]: string}: Map of parameters for the debug hook. Maximum of 20 entries.
localPath string: Local path where debug output is written.

s3_output_path str: S3 URI where debug output is stored.
collection_configurations Sequence[TrainingJobDebugHookConfigCollectionConfiguration]: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
hook_parameters Mapping[str, str]: Map of parameters for the debug hook. Maximum of 20 entries.
local_path str: Local path where debug output is written.

s3OutputPath String: S3 URI where debug output is stored.
collectionConfigurations List<Property Map>: List of tensor collections to configure for the debug hook. Maximum of 20. See collectionConfigurations below.
hookParameters Map<String>: Map of parameters for the debug hook. Maximum of 20 entries.
localPath String: Local path where debug output is written.

TrainingJobDebugHookConfigCollectionConfiguration, TrainingJobDebugHookConfigCollectionConfigurationArgs

CollectionName string: Name of the tensor collection.
CollectionParameters Dictionary<string, string>: Map of parameters for the tensor collection.

CollectionName string: Name of the tensor collection.
CollectionParameters map[string]string: Map of parameters for the tensor collection.

collectionName String: Name of the tensor collection.
collectionParameters Map<String,String>: Map of parameters for the tensor collection.

collectionName string: Name of the tensor collection.
collectionParameters {[key: string]: string}: Map of parameters for the tensor collection.

collection_name str: Name of the tensor collection.
collection_parameters Mapping[str, str]: Map of parameters for the tensor collection.

collectionName String: Name of the tensor collection.
collectionParameters Map<String>: Map of parameters for the tensor collection.

TrainingJobDebugRuleConfiguration, TrainingJobDebugRuleConfigurationArgs

RuleConfigurationName string: Name of the rule configuration. Must be between 1 and 256 characters.
RuleEvaluatorImage string: Docker image URI for the rule evaluator.
InstanceType string: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
LocalPath string: Local path where debug rule output is written.
RuleParameters Dictionary<string, string>: Map of parameters for the rule configuration. Maximum of 100 entries.
S3OutputPath string: S3 URI where rule output is stored.
VolumeSizeInGb int: Size of the storage volume for the rule evaluator, in GB.

RuleConfigurationName string: Name of the rule configuration. Must be between 1 and 256 characters.
RuleEvaluatorImage string: Docker image URI for the rule evaluator.
InstanceType string: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
LocalPath string: Local path where debug rule output is written.
RuleParameters map[string]string: Map of parameters for the rule configuration. Maximum of 100 entries.
S3OutputPath string: S3 URI where rule output is stored.
VolumeSizeInGb int: Size of the storage volume for the rule evaluator, in GB.

ruleConfigurationName String: Name of the rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage String: Docker image URI for the rule evaluator.
instanceType String: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
localPath String: Local path where debug rule output is written.
ruleParameters Map<String,String>: Map of parameters for the rule configuration. Maximum of 100 entries.
s3OutputPath String: S3 URI where rule output is stored.
volumeSizeInGb Integer: Size of the storage volume for the rule evaluator, in GB.

ruleConfigurationName string: Name of the rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage string: Docker image URI for the rule evaluator.
instanceType string: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
localPath string: Local path where debug rule output is written.
ruleParameters {[key: string]: string}: Map of parameters for the rule configuration. Maximum of 100 entries.
s3OutputPath string: S3 URI where rule output is stored.
volumeSizeInGb number: Size of the storage volume for the rule evaluator, in GB.

rule_configuration_name str: Name of the rule configuration. Must be between 1 and 256 characters.
rule_evaluator_image str: Docker image URI for the rule evaluator.
instance_type str: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
local_path str: Local path where debug rule output is written.
rule_parameters Mapping[str, str]: Map of parameters for the rule configuration. Maximum of 100 entries.
s3_output_path str: S3 URI where rule output is stored.
volume_size_in_gb int: Size of the storage volume for the rule evaluator, in GB.

ruleConfigurationName String: Name of the rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage String: Docker image URI for the rule evaluator.
instanceType String: Instance type to deploy for the debug rule evaluation. Valid values are SageMaker AI processing instance types.
localPath String: Local path where debug rule output is written.
ruleParameters Map<String>: Map of parameters for the rule configuration. Maximum of 100 entries.
s3OutputPath String: S3 URI where rule output is stored.
volumeSizeInGb Number: Size of the storage volume for the rule evaluator, in GB.

TrainingJobExperimentConfig, TrainingJobExperimentConfigArgs

ExperimentName string: Name of the SageMaker AI Experiment to associate with.
RunName string: Name of the Experiment Run to associate with.
TrialComponentDisplayName string: Display name for the trial component.
TrialName string: Name of the SageMaker AI Trial to associate with.

ExperimentName string: Name of the SageMaker AI Experiment to associate with.
RunName string: Name of the Experiment Run to associate with.
TrialComponentDisplayName string: Display name for the trial component.
TrialName string: Name of the SageMaker AI Trial to associate with.

experimentName String: Name of the SageMaker AI Experiment to associate with.
runName String: Name of the Experiment Run to associate with.
trialComponentDisplayName String: Display name for the trial component.
trialName String: Name of the SageMaker AI Trial to associate with.

experimentName string: Name of the SageMaker AI Experiment to associate with.
runName string: Name of the Experiment Run to associate with.
trialComponentDisplayName string: Display name for the trial component.
trialName string: Name of the SageMaker AI Trial to associate with.

experiment_name str: Name of the SageMaker AI Experiment to associate with.
run_name str: Name of the Experiment Run to associate with.
trial_component_display_name str: Display name for the trial component.
trial_name str: Name of the SageMaker AI Trial to associate with.

experimentName String: Name of the SageMaker AI Experiment to associate with.
runName String: Name of the Experiment Run to associate with.
trialComponentDisplayName String: Display name for the trial component.
trialName String: Name of the SageMaker AI Trial to associate with.

TrainingJobInfraCheckConfig, TrainingJobInfraCheckConfigArgs

EnableInfraCheck bool: Whether to enable infrastructure health checks before training.

EnableInfraCheck bool: Whether to enable infrastructure health checks before training.

enableInfraCheck Boolean: Whether to enable infrastructure health checks before training.

enableInfraCheck boolean: Whether to enable infrastructure health checks before training.

enable_infra_check bool: Whether to enable infrastructure health checks before training.

enableInfraCheck Boolean: Whether to enable infrastructure health checks before training.

TrainingJobInputDataConfig, TrainingJobInputDataConfigArgs

ChannelName string: Name of the channel. Must be between 1 and 64 characters.
CompressionType string: Compression type for the input data. Valid values: None, Gzip.
ContentType string: MIME type of the input data.
DataSource TrainingJobInputDataConfigDataSource: Location of the channel data. See dataSource below.
InputMode string: Input mode for the channel data. Valid values: File, Pipe, FastFile.
RecordWrapperType string: Record wrapper type. Valid values: None, RecordIO.
ShuffleConfig TrainingJobInputDataConfigShuffleConfig: Configuration for shuffling data in the channel. See shuffleConfig below.

ChannelName string: Name of the channel. Must be between 1 and 64 characters.
CompressionType string: Compression type for the input data. Valid values: None, Gzip.
ContentType string: MIME type of the input data.
DataSource TrainingJobInputDataConfigDataSource: Location of the channel data. See dataSource below.
InputMode string: Input mode for the channel data. Valid values: File, Pipe, FastFile.
RecordWrapperType string: Record wrapper type. Valid values: None, RecordIO.
ShuffleConfig TrainingJobInputDataConfigShuffleConfig: Configuration for shuffling data in the channel. See shuffleConfig below.

channelName String: Name of the channel. Must be between 1 and 64 characters.
compressionType String: Compression type for the input data. Valid values: None, Gzip.
contentType String: MIME type of the input data.
dataSource TrainingJobInputDataConfigDataSource: Location of the channel data. See dataSource below.
inputMode String: Input mode for the channel data. Valid values: File, Pipe, FastFile.
recordWrapperType String: Record wrapper type. Valid values: None, RecordIO.
shuffleConfig TrainingJobInputDataConfigShuffleConfig: Configuration for shuffling data in the channel. See shuffleConfig below.

channelName string: Name of the channel. Must be between 1 and 64 characters.
compressionType string: Compression type for the input data. Valid values: None, Gzip.
contentType string: MIME type of the input data.
dataSource TrainingJobInputDataConfigDataSource: Location of the channel data. See dataSource below.
inputMode string: Input mode for the channel data. Valid values: File, Pipe, FastFile.
recordWrapperType string: Record wrapper type. Valid values: None, RecordIO.
shuffleConfig TrainingJobInputDataConfigShuffleConfig: Configuration for shuffling data in the channel. See shuffleConfig below.

channel_name str: Name of the channel. Must be between 1 and 64 characters.
compression_type str: Compression type for the input data. Valid values: None, Gzip.
content_type str: MIME type of the input data.
data_source TrainingJobInputDataConfigDataSource: Location of the channel data. See dataSource below.
input_mode str: Input mode for the channel data. Valid values: File, Pipe, FastFile.
record_wrapper_type str: Record wrapper type. Valid values: None, RecordIO.
shuffle_config TrainingJobInputDataConfigShuffleConfig: Configuration for shuffling data in the channel. See shuffleConfig below.

channelName String: Name of the channel. Must be between 1 and 64 characters.
compressionType String: Compression type for the input data. Valid values: None, Gzip.
contentType String: MIME type of the input data.
dataSource Property Map: Location of the channel data. See dataSource below.
inputMode String: Input mode for the channel data. Valid values: File, Pipe, FastFile.
recordWrapperType String: Record wrapper type. Valid values: None, RecordIO.
shuffleConfig Property Map: Configuration for shuffling data in the channel. See shuffleConfig below.

TrainingJobInputDataConfigDataSource, TrainingJobInputDataConfigDataSourceArgs

FileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource: File system data source. See fileSystemDataSource below.
S3DataSource TrainingJobInputDataConfigDataSourceS3DataSource: S3 data source. See s3DataSource below.

FileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource: File system data source. See fileSystemDataSource below.
S3DataSource TrainingJobInputDataConfigDataSourceS3DataSource: S3 data source. See s3DataSource below.

fileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource: File system data source. See fileSystemDataSource below.
s3DataSource TrainingJobInputDataConfigDataSourceS3DataSource: S3 data source. See s3DataSource below.

fileSystemDataSource TrainingJobInputDataConfigDataSourceFileSystemDataSource: File system data source. See fileSystemDataSource below.
s3DataSource TrainingJobInputDataConfigDataSourceS3DataSource: S3 data source. See s3DataSource below.

file_system_data_source TrainingJobInputDataConfigDataSourceFileSystemDataSource: File system data source. See fileSystemDataSource below.
s3_data_source TrainingJobInputDataConfigDataSourceS3DataSource: S3 data source. See s3DataSource below.

fileSystemDataSource Property Map: File system data source. See fileSystemDataSource below.
s3DataSource Property Map: S3 data source. See s3DataSource below.

TrainingJobInputDataConfigDataSourceFileSystemDataSource, TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs

DirectoryPath string: Full path to the directory on the file system.
FileSystemAccessMode string: Access mode for the file system. Valid values: ro, rw.
FileSystemId string: File system ID.
FileSystemType string: File system type. Valid values: EFS, FSxLustre.

DirectoryPath string: Full path to the directory on the file system.
FileSystemAccessMode string: Access mode for the file system. Valid values: ro, rw.
FileSystemId string: File system ID.
FileSystemType string: File system type. Valid values: EFS, FSxLustre.

directoryPath String: Full path to the directory on the file system.
fileSystemAccessMode String: Access mode for the file system. Valid values: ro, rw.
fileSystemId String: File system ID.
fileSystemType String: File system type. Valid values: EFS, FSxLustre.

directoryPath string: Full path to the directory on the file system.
fileSystemAccessMode string: Access mode for the file system. Valid values: ro, rw.
fileSystemId string: File system ID.
fileSystemType string: File system type. Valid values: EFS, FSxLustre.

directory_path str: Full path to the directory on the file system.
file_system_access_mode str: Access mode for the file system. Valid values: ro, rw.
file_system_id str: File system ID.
file_system_type str: File system type. Valid values: EFS, FSxLustre.

directoryPath String: Full path to the directory on the file system.
fileSystemAccessMode String: Access mode for the file system. Valid values: ro, rw.
fileSystemId String: File system ID.
fileSystemType String: File system type. Valid values: EFS, FSxLustre.

TrainingJobInputDataConfigDataSourceS3DataSource, TrainingJobInputDataConfigDataSourceS3DataSourceArgs

S3DataType string: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
S3Uri string: S3 URI of the data.
AttributeNames List<string>: List of attribute names to include in the training dataset. Maximum of 16.
HubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig: SageMaker AI Hub access configuration. See hubAccessConfig below.
InstanceGroupNames List<string>: List of instance group names for the training data distribution. Maximum of 5.
ModelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig: Model access configuration. See modelAccessConfig below.
S3DataDistributionType string: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

S3DataType string: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
S3Uri string: S3 URI of the data.
AttributeNames []string: List of attribute names to include in the training dataset. Maximum of 16.
HubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig: SageMaker AI Hub access configuration. See hubAccessConfig below.
InstanceGroupNames []string: List of instance group names for the training data distribution. Maximum of 5.
ModelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig: Model access configuration. See modelAccessConfig below.
S3DataDistributionType string: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

s3DataType String: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
s3Uri String: S3 URI of the data.
attributeNames List<String>: List of attribute names to include in the training dataset. Maximum of 16.
hubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig: SageMaker AI Hub access configuration. See hubAccessConfig below.
instanceGroupNames List<String>: List of instance group names for the training data distribution. Maximum of 5.
modelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig: Model access configuration. See modelAccessConfig below.
s3DataDistributionType String: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

s3DataType string: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
s3Uri string: S3 URI of the data.
attributeNames string[]: List of attribute names to include in the training dataset. Maximum of 16.
hubAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig: SageMaker AI Hub access configuration. See hubAccessConfig below.
instanceGroupNames string[]: List of instance group names for the training data distribution. Maximum of 5.
modelAccessConfig TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig: Model access configuration. See modelAccessConfig below.
s3DataDistributionType string: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

s3_data_type str: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
s3_uri str: S3 URI of the data.
attribute_names Sequence[str]: List of attribute names to include in the training dataset. Maximum of 16.
hub_access_config TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig: SageMaker AI Hub access configuration. See hubAccessConfig below.
instance_group_names Sequence[str]: List of instance group names for the training data distribution. Maximum of 5.
model_access_config TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig: Model access configuration. See modelAccessConfig below.
s3_data_distribution_type str: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

s3DataType String: S3 data type. Valid values: ManifestFile, S3Prefix, AugmentedManifestFile.
s3Uri String: S3 URI of the data.
attributeNames List<String>: List of attribute names to include in the training dataset. Maximum of 16.
hubAccessConfig Property Map: SageMaker AI Hub access configuration. See hubAccessConfig below.
instanceGroupNames List<String>: List of instance group names for the training data distribution. Maximum of 5.
modelAccessConfig Property Map: Model access configuration. See modelAccessConfig below.
s3DataDistributionType String: Distribution type for S3 data. Valid values: FullyReplicated, ShardedByS3Key.

TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs

HubContentArn string: ARN of the hub content.

HubContentArn string: ARN of the hub content.

hubContentArn String: ARN of the hub content.

hubContentArn string: ARN of the hub content.

hub_content_arn str: ARN of the hub content.

hubContentArn String: ARN of the hub content.

TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs

AcceptEula bool: Whether to accept the model EULA.

AcceptEula bool: Whether to accept the model EULA.

acceptEula Boolean: Whether to accept the model EULA.

acceptEula boolean: Whether to accept the model EULA.

accept_eula bool: Whether to accept the model EULA.

acceptEula Boolean: Whether to accept the model EULA.

TrainingJobInputDataConfigShuffleConfig, TrainingJobInputDataConfigShuffleConfigArgs

Seed int: Seed value used to shuffle the training data.

Seed int: Seed value used to shuffle the training data.

seed Integer: Seed value used to shuffle the training data.

seed number: Seed value used to shuffle the training data.

seed int: Seed value used to shuffle the training data.

seed Number: Seed value used to shuffle the training data.

TrainingJobMlflowConfig, TrainingJobMlflowConfigArgs

MlflowResourceArn string: ARN of the MLflow tracking server.
MlflowExperimentName string: Name of the MLflow experiment.
MlflowRunName string: Name of the MLflow run.

MlflowResourceArn string: ARN of the MLflow tracking server.
MlflowExperimentName string: Name of the MLflow experiment.
MlflowRunName string: Name of the MLflow run.

mlflowResourceArn String: ARN of the MLflow tracking server.
mlflowExperimentName String: Name of the MLflow experiment.
mlflowRunName String: Name of the MLflow run.

mlflowResourceArn string: ARN of the MLflow tracking server.
mlflowExperimentName string: Name of the MLflow experiment.
mlflowRunName string: Name of the MLflow run.

mlflow_resource_arn str: ARN of the MLflow tracking server.
mlflow_experiment_name str: Name of the MLflow experiment.
mlflow_run_name str: Name of the MLflow run.

mlflowResourceArn String: ARN of the MLflow tracking server.
mlflowExperimentName String: Name of the MLflow experiment.
mlflowRunName String: Name of the MLflow run.

TrainingJobModelPackageConfig, TrainingJobModelPackageConfigArgs

ModelPackageGroupArn string: ARN of the model package group.
SourceModelPackageArn string: ARN of the source model package.

ModelPackageGroupArn string: ARN of the model package group.
SourceModelPackageArn string: ARN of the source model package.

modelPackageGroupArn String: ARN of the model package group.
sourceModelPackageArn String: ARN of the source model package.

modelPackageGroupArn string: ARN of the model package group.
sourceModelPackageArn string: ARN of the source model package.

model_package_group_arn str: ARN of the model package group.
source_model_package_arn str: ARN of the source model package.

modelPackageGroupArn String: ARN of the model package group.
sourceModelPackageArn String: ARN of the source model package.

TrainingJobOutputDataConfig, TrainingJobOutputDataConfigArgs

S3OutputPath string: S3 URI where output data is stored.
CompressionType string: Output compression type. Valid values: GZIP, NONE.
KmsKeyId string: KMS key ID used to encrypt the output data.

S3OutputPath string: S3 URI where output data is stored.
CompressionType string: Output compression type. Valid values: GZIP, NONE.
KmsKeyId string: KMS key ID used to encrypt the output data.

s3OutputPath String: S3 URI where output data is stored.
compressionType String: Output compression type. Valid values: GZIP, NONE.
kmsKeyId String: KMS key ID used to encrypt the output data.

s3OutputPath string: S3 URI where output data is stored.
compressionType string: Output compression type. Valid values: GZIP, NONE.
kmsKeyId string: KMS key ID used to encrypt the output data.

s3_output_path str: S3 URI where output data is stored.
compression_type str: Output compression type. Valid values: GZIP, NONE.
kms_key_id str: KMS key ID used to encrypt the output data.

s3OutputPath String: S3 URI where output data is stored.
compressionType String: Output compression type. Valid values: GZIP, NONE.
kmsKeyId String: KMS key ID used to encrypt the output data.

TrainingJobProfilerConfig, TrainingJobProfilerConfigArgs

DisableProfiler bool: Whether to disable the profiler.
ProfilingIntervalInMilliseconds int: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
ProfilingParameters Dictionary<string, string>: Map of profiling parameters. Maximum of 20 entries.
S3OutputPath string: S3 URI where profiler output is stored.

DisableProfiler bool: Whether to disable the profiler.
ProfilingIntervalInMilliseconds int: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
ProfilingParameters map[string]string: Map of profiling parameters. Maximum of 20 entries.
S3OutputPath string: S3 URI where profiler output is stored.

disableProfiler Boolean: Whether to disable the profiler.
profilingIntervalInMilliseconds Integer: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
profilingParameters Map<String,String>: Map of profiling parameters. Maximum of 20 entries.
s3OutputPath String: S3 URI where profiler output is stored.

disableProfiler boolean: Whether to disable the profiler.
profilingIntervalInMilliseconds number: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
profilingParameters {[key: string]: string}: Map of profiling parameters. Maximum of 20 entries.
s3OutputPath string: S3 URI where profiler output is stored.

disable_profiler bool: Whether to disable the profiler.
profiling_interval_in_milliseconds int: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
profiling_parameters Mapping[str, str]: Map of profiling parameters. Maximum of 20 entries.
s3_output_path str: S3 URI where profiler output is stored.

disableProfiler Boolean: Whether to disable the profiler.
profilingIntervalInMilliseconds Number: Time interval in milliseconds for capturing system metrics. Valid values: 100, 200, 500, 1000, 5000, 60000.
profilingParameters Map<String>: Map of profiling parameters. Maximum of 20 entries.
s3OutputPath String: S3 URI where profiler output is stored.

TrainingJobProfilerRuleConfiguration, TrainingJobProfilerRuleConfigurationArgs

RuleConfigurationName string: Name of the profiler rule configuration. Must be between 1 and 256 characters.
RuleEvaluatorImage string: Docker image URI for the profiler rule evaluator.
InstanceType string: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
LocalPath string: Local path where profiler rule output is written.
RuleParameters Dictionary<string, string>: Map of parameters for the profiler rule. Maximum of 100 entries.
S3OutputPath string: S3 URI where profiler rule output is stored.
VolumeSizeInGb int: Size of the storage volume for the profiler rule evaluator, in GB.

RuleConfigurationName string: Name of the profiler rule configuration. Must be between 1 and 256 characters.
RuleEvaluatorImage string: Docker image URI for the profiler rule evaluator.
InstanceType string: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
LocalPath string: Local path where profiler rule output is written.
RuleParameters map[string]string: Map of parameters for the profiler rule. Maximum of 100 entries.
S3OutputPath string: S3 URI where profiler rule output is stored.
VolumeSizeInGb int: Size of the storage volume for the profiler rule evaluator, in GB.

ruleConfigurationName String: Name of the profiler rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage String: Docker image URI for the profiler rule evaluator.
instanceType String: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
localPath String: Local path where profiler rule output is written.
ruleParameters Map<String,String>: Map of parameters for the profiler rule. Maximum of 100 entries.
s3OutputPath String: S3 URI where profiler rule output is stored.
volumeSizeInGb Integer: Size of the storage volume for the profiler rule evaluator, in GB.

ruleConfigurationName string: Name of the profiler rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage string: Docker image URI for the profiler rule evaluator.
instanceType string: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
localPath string: Local path where profiler rule output is written.
ruleParameters {[key: string]: string}: Map of parameters for the profiler rule. Maximum of 100 entries.
s3OutputPath string: S3 URI where profiler rule output is stored.
volumeSizeInGb number: Size of the storage volume for the profiler rule evaluator, in GB.

rule_configuration_name str: Name of the profiler rule configuration. Must be between 1 and 256 characters.
rule_evaluator_image str: Docker image URI for the profiler rule evaluator.
instance_type str: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
local_path str: Local path where profiler rule output is written.
rule_parameters Mapping[str, str]: Map of parameters for the profiler rule. Maximum of 100 entries.
s3_output_path str: S3 URI where profiler rule output is stored.
volume_size_in_gb int: Size of the storage volume for the profiler rule evaluator, in GB.

ruleConfigurationName String: Name of the profiler rule configuration. Must be between 1 and 256 characters.
ruleEvaluatorImage String: Docker image URI for the profiler rule evaluator.
instanceType String: Instance type to deploy for the profiler rule evaluation. Valid values are SageMaker AI processing instance types.
localPath String: Local path where profiler rule output is written.
ruleParameters Map<String>: Map of parameters for the profiler rule. Maximum of 100 entries.
s3OutputPath String: S3 URI where profiler rule output is stored.
volumeSizeInGb Number: Size of the storage volume for the profiler rule evaluator, in GB.

TrainingJobRemoteDebugConfig, TrainingJobRemoteDebugConfigArgs

EnableRemoteDebug bool: Whether to enable remote debugging for the training job.

EnableRemoteDebug bool: Whether to enable remote debugging for the training job.

enableRemoteDebug Boolean: Whether to enable remote debugging for the training job.

enableRemoteDebug boolean: Whether to enable remote debugging for the training job.

enable_remote_debug bool: Whether to enable remote debugging for the training job.

enableRemoteDebug Boolean: Whether to enable remote debugging for the training job.

TrainingJobResourceConfig, TrainingJobResourceConfigArgs

InstanceCount int: Number of ML compute instances to use. Conflicts with instanceGroups.
InstanceGroups List<TrainingJobResourceConfigInstanceGroup>: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
InstancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig: Instance placement configuration. See instancePlacementConfig below.
InstanceType string: ML compute instance type. Conflicts with instanceGroups.
KeepAlivePeriodInSeconds int: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
TrainingPlanArn string: ARN of the training plan to use.
VolumeKmsKeyId string: KMS key ID used to encrypt data on the storage volume.
VolumeSizeInGb int: Size of the storage volume attached to each instance, in GB.

InstanceCount int: Number of ML compute instances to use. Conflicts with instanceGroups.
InstanceGroups []TrainingJobResourceConfigInstanceGroup: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
InstancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig: Instance placement configuration. See instancePlacementConfig below.
InstanceType string: ML compute instance type. Conflicts with instanceGroups.
KeepAlivePeriodInSeconds int: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
TrainingPlanArn string: ARN of the training plan to use.
VolumeKmsKeyId string: KMS key ID used to encrypt data on the storage volume.
VolumeSizeInGb int: Size of the storage volume attached to each instance, in GB.

instanceCount Integer: Number of ML compute instances to use. Conflicts with instanceGroups.
instanceGroups List<TrainingJobResourceConfigInstanceGroup>: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
instancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig: Instance placement configuration. See instancePlacementConfig below.
instanceType String: ML compute instance type. Conflicts with instanceGroups.
keepAlivePeriodInSeconds Integer: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
trainingPlanArn String: ARN of the training plan to use.
volumeKmsKeyId String: KMS key ID used to encrypt data on the storage volume.
volumeSizeInGb Integer: Size of the storage volume attached to each instance, in GB.

instanceCount number: Number of ML compute instances to use. Conflicts with instanceGroups.
instanceGroups TrainingJobResourceConfigInstanceGroup[]: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
instancePlacementConfig TrainingJobResourceConfigInstancePlacementConfig: Instance placement configuration. See instancePlacementConfig below.
instanceType string: ML compute instance type. Conflicts with instanceGroups.
keepAlivePeriodInSeconds number: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
trainingPlanArn string: ARN of the training plan to use.
volumeKmsKeyId string: KMS key ID used to encrypt data on the storage volume.
volumeSizeInGb number: Size of the storage volume attached to each instance, in GB.

instance_count int: Number of ML compute instances to use. Conflicts with instanceGroups.
instance_groups Sequence[TrainingJobResourceConfigInstanceGroup]: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
instance_placement_config TrainingJobResourceConfigInstancePlacementConfig: Instance placement configuration. See instancePlacementConfig below.
instance_type str: ML compute instance type. Conflicts with instanceGroups.
keep_alive_period_in_seconds int: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
training_plan_arn str: ARN of the training plan to use.
volume_kms_key_id str: KMS key ID used to encrypt data on the storage volume.
volume_size_in_gb int: Size of the storage volume attached to each instance, in GB.

instanceCount Number: Number of ML compute instances to use. Conflicts with instanceGroups.
instanceGroups List<Property Map>: List of instance groups for heterogeneous cluster training. Maximum of 5. Conflicts with instanceCount, instanceType, and keepAlivePeriodInSeconds. See instanceGroups below.
instancePlacementConfig Property Map: Instance placement configuration. See instancePlacementConfig below.
instanceType String: ML compute instance type. Conflicts with instanceGroups.
keepAlivePeriodInSeconds Number: Time in seconds to keep instances alive after training completes, for warm pool reuse. Valid values: 0–3600. Conflicts with instanceGroups.
trainingPlanArn String: ARN of the training plan to use.
volumeKmsKeyId String: KMS key ID used to encrypt data on the storage volume.
volumeSizeInGb Number: Size of the storage volume attached to each instance, in GB.

TrainingJobResourceConfigInstanceGroup, TrainingJobResourceConfigInstanceGroupArgs

InstanceCount int: Number of instances in the group.
InstanceGroupName string: Name of the instance group.
InstanceType string: ML compute instance type for the group.

InstanceCount int: Number of instances in the group.
InstanceGroupName string: Name of the instance group.
InstanceType string: ML compute instance type for the group.

instanceCount Integer: Number of instances in the group.
instanceGroupName String: Name of the instance group.
instanceType String: ML compute instance type for the group.

instanceCount number: Number of instances in the group.
instanceGroupName string: Name of the instance group.
instanceType string: ML compute instance type for the group.

instance_count int: Number of instances in the group.
instance_group_name str: Name of the instance group.
instance_type str: ML compute instance type for the group.

instanceCount Number: Number of instances in the group.
instanceGroupName String: Name of the instance group.
instanceType String: ML compute instance type for the group.

TrainingJobResourceConfigInstancePlacementConfig, TrainingJobResourceConfigInstancePlacementConfigArgs

EnableMultipleJobs bool: Whether to enable multiple jobs on the same instance.
PlacementSpecifications List<TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification>: Placement specifications for instance placement. See placementSpecifications below.

EnableMultipleJobs bool: Whether to enable multiple jobs on the same instance.
PlacementSpecifications []TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification: Placement specifications for instance placement. See placementSpecifications below.

enableMultipleJobs Boolean: Whether to enable multiple jobs on the same instance.
placementSpecifications List<TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification>: Placement specifications for instance placement. See placementSpecifications below.

enableMultipleJobs boolean: Whether to enable multiple jobs on the same instance.
placementSpecifications TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification[]: Placement specifications for instance placement. See placementSpecifications below.

enable_multiple_jobs bool: Whether to enable multiple jobs on the same instance.
placement_specifications Sequence[TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification]: Placement specifications for instance placement. See placementSpecifications below.

enableMultipleJobs Boolean: Whether to enable multiple jobs on the same instance.
placementSpecifications List<Property Map>: Placement specifications for instance placement. See placementSpecifications below.

TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification, TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs

InstanceCount int: Number of instances in the placement.
UltraServerId string: Ultra server ID for the placement.

InstanceCount int: Number of instances in the placement.
UltraServerId string: Ultra server ID for the placement.

instanceCount Integer: Number of instances in the placement.
ultraServerId String: Ultra server ID for the placement.

instanceCount number: Number of instances in the placement.
ultraServerId string: Ultra server ID for the placement.

instance_count int: Number of instances in the placement.
ultra_server_id str: Ultra server ID for the placement.

instanceCount Number: Number of instances in the placement.
ultraServerId String: Ultra server ID for the placement.

TrainingJobRetryStrategy, TrainingJobRetryStrategyArgs

MaximumRetryAttempts int: Maximum number of retry attempts. Valid values: 1–30.

MaximumRetryAttempts int: Maximum number of retry attempts. Valid values: 1–30.

maximumRetryAttempts Integer: Maximum number of retry attempts. Valid values: 1–30.

maximumRetryAttempts number: Maximum number of retry attempts. Valid values: 1–30.

maximum_retry_attempts int: Maximum number of retry attempts. Valid values: 1–30.

maximumRetryAttempts Number: Maximum number of retry attempts. Valid values: 1–30.

TrainingJobServerlessJobConfig, TrainingJobServerlessJobConfigArgs

BaseModelArn string: ARN of the base foundation model from the SageMaker AI Public Hub.
JobType string: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
AcceptEula bool: Whether to accept the model EULA.
CustomizationTechnique string: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
EvaluationType string: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
EvaluatorArn string: ARN of the evaluator.
Peft string: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

BaseModelArn string: ARN of the base foundation model from the SageMaker AI Public Hub.
JobType string: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
AcceptEula bool: Whether to accept the model EULA.
CustomizationTechnique string: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
EvaluationType string: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
EvaluatorArn string: ARN of the evaluator.
Peft string: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

baseModelArn String: ARN of the base foundation model from the SageMaker AI Public Hub.
jobType String: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
acceptEula Boolean: Whether to accept the model EULA.
customizationTechnique String: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
evaluationType String: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
evaluatorArn String: ARN of the evaluator.
peft String: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

baseModelArn string: ARN of the base foundation model from the SageMaker AI Public Hub.
jobType string: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
acceptEula boolean: Whether to accept the model EULA.
customizationTechnique string: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
evaluationType string: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
evaluatorArn string: ARN of the evaluator.
peft string: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

base_model_arn str: ARN of the base foundation model from the SageMaker AI Public Hub.
job_type str: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
accept_eula bool: Whether to accept the model EULA.
customization_technique str: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
evaluation_type str: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
evaluator_arn str: ARN of the evaluator.
peft str: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

baseModelArn String: ARN of the base foundation model from the SageMaker AI Public Hub.
jobType String: Serverless job type. Valid values: FINE_TUNING, EVALUATION, DISTILLATION.
acceptEula Boolean: Whether to accept the model EULA.
customizationTechnique String: Customization technique to apply. Valid values: FINE_TUNING, DOMAIN_ADAPTION.
evaluationType String: Evaluation type. Valid values: AUTOMATIC, HUMAN, NONE.
evaluatorArn String: ARN of the evaluator.
peft String: Parameter-Efficient Fine-Tuning (PEFT) method. Valid values: LORA.

TrainingJobSessionChainingConfig, TrainingJobSessionChainingConfigArgs

EnableSessionTagChaining bool: Whether to enable session tag chaining for the training job.

EnableSessionTagChaining bool: Whether to enable session tag chaining for the training job.

enableSessionTagChaining Boolean: Whether to enable session tag chaining for the training job.

enableSessionTagChaining boolean: Whether to enable session tag chaining for the training job.

enable_session_tag_chaining bool: Whether to enable session tag chaining for the training job.

enableSessionTagChaining Boolean: Whether to enable session tag chaining for the training job.

TrainingJobStoppingCondition, TrainingJobStoppingConditionArgs

MaxPendingTimeInSeconds int: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
MaxRuntimeInSeconds int: Maximum time in seconds the training job can run before it is stopped.
MaxWaitTimeInSeconds int: Maximum time in seconds to wait for a managed spot training job to complete.

MaxPendingTimeInSeconds int: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
MaxRuntimeInSeconds int: Maximum time in seconds the training job can run before it is stopped.
MaxWaitTimeInSeconds int: Maximum time in seconds to wait for a managed spot training job to complete.

maxPendingTimeInSeconds Integer: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
maxRuntimeInSeconds Integer: Maximum time in seconds the training job can run before it is stopped.
maxWaitTimeInSeconds Integer: Maximum time in seconds to wait for a managed spot training job to complete.

maxPendingTimeInSeconds number: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
maxRuntimeInSeconds number: Maximum time in seconds the training job can run before it is stopped.
maxWaitTimeInSeconds number: Maximum time in seconds to wait for a managed spot training job to complete.

max_pending_time_in_seconds int: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
max_runtime_in_seconds int: Maximum time in seconds the training job can run before it is stopped.
max_wait_time_in_seconds int: Maximum time in seconds to wait for a managed spot training job to complete.

maxPendingTimeInSeconds Number: Maximum time in seconds a training job can be pending before it is stopped. Valid values: 7200–2419200.
maxRuntimeInSeconds Number: Maximum time in seconds the training job can run before it is stopped.
maxWaitTimeInSeconds Number: Maximum time in seconds to wait for a managed spot training job to complete.

TrainingJobTensorBoardOutputConfig, TrainingJobTensorBoardOutputConfigArgs

S3OutputPath string: S3 URI where TensorBoard output is stored.
LocalPath string: Local path where TensorBoard output is written.

S3OutputPath string: S3 URI where TensorBoard output is stored.
LocalPath string: Local path where TensorBoard output is written.

s3OutputPath String: S3 URI where TensorBoard output is stored.
localPath String: Local path where TensorBoard output is written.

s3OutputPath string: S3 URI where TensorBoard output is stored.
localPath string: Local path where TensorBoard output is written.

s3_output_path str: S3 URI where TensorBoard output is stored.
local_path str: Local path where TensorBoard output is written.

s3OutputPath String: S3 URI where TensorBoard output is stored.
localPath String: Local path where TensorBoard output is written.

TrainingJobTimeouts, TrainingJobTimeoutsArgs

Create string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
Delete string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
Update string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

Create string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
Delete string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
Update string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

create String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
delete String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
update String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

create string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
delete string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
update string: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

create str: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
delete str: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
update str: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

create String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
delete String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours). Setting a timeout for a Delete operation is only applicable if changes are saved into state before the destroy operation occurs.
update String: A string that can be parsed as a duration consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).

TrainingJobVpcConfig, TrainingJobVpcConfigArgs

SecurityGroupIds List<string>: List of VPC security group IDs. Maximum of 5.
Subnets List<string>: List of subnet IDs. Maximum of 16.

SecurityGroupIds []string: List of VPC security group IDs. Maximum of 5.
Subnets []string: List of subnet IDs. Maximum of 16.

securityGroupIds List<String>: List of VPC security group IDs. Maximum of 5.
subnets List<String>: List of subnet IDs. Maximum of 16.

securityGroupIds string[]: List of VPC security group IDs. Maximum of 5.
subnets string[]: List of subnet IDs. Maximum of 16.

security_group_ids Sequence[str]: List of VPC security group IDs. Maximum of 5.
subnets Sequence[str]: List of subnet IDs. Maximum of 16.

securityGroupIds List<String>: List of VPC security group IDs. Maximum of 5.
subnets List<String>: List of subnet IDs. Maximum of 16.

Import

Identity Schema

Required

trainingJobName - (String) Name of the Training Job.

Optional

accountId (String) AWS Account where this resource is managed.
region (String) Region where this resource is managed.

Using pulumi import, import SageMaker AI Training Job using the trainingJobName. For example:

$ pulumi import aws:sagemaker/trainingJob:TrainingJob example my-training-job

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: AWS Classic pulumi/pulumi-aws
License: Apache-2.0
Notes: This Pulumi package is based on the aws Terraform Provider.

Viewing docs for AWS v7.28.0
published on Thursday, Apr 30, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

aws.sagemaker.TrainingJob

On this page

On this page

Example Usage

Basic Usage

With VPC Configuration

With Input Data and Hyperparameters

With Encrypted Output, Checkpoints, and TensorBoard

With Managed Spot Training and Custom Metrics

With Multiple Input Channels, Infrastructure Checks, and Session Tag Chaining

Create TrainingJob Resource

Constructor syntax

Parameters

Constructor example

TrainingJob Resource Properties

Inputs

Outputs

Look up Existing TrainingJob Resource

Supporting Types

TrainingJobAlgorithmSpecification, TrainingJobAlgorithmSpecificationArgs

TrainingJobAlgorithmSpecificationMetricDefinition, TrainingJobAlgorithmSpecificationMetricDefinitionArgs

TrainingJobAlgorithmSpecificationTrainingImageConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigArgs

TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfig, TrainingJobAlgorithmSpecificationTrainingImageConfigTrainingRepositoryAuthConfigArgs

TrainingJobCheckpointConfig, TrainingJobCheckpointConfigArgs

TrainingJobDebugHookConfig, TrainingJobDebugHookConfigArgs

TrainingJobDebugHookConfigCollectionConfiguration, TrainingJobDebugHookConfigCollectionConfigurationArgs

TrainingJobDebugRuleConfiguration, TrainingJobDebugRuleConfigurationArgs

TrainingJobExperimentConfig, TrainingJobExperimentConfigArgs

TrainingJobInfraCheckConfig, TrainingJobInfraCheckConfigArgs

TrainingJobInputDataConfig, TrainingJobInputDataConfigArgs

TrainingJobInputDataConfigDataSource, TrainingJobInputDataConfigDataSourceArgs

TrainingJobInputDataConfigDataSourceFileSystemDataSource, TrainingJobInputDataConfigDataSourceFileSystemDataSourceArgs

TrainingJobInputDataConfigDataSourceS3DataSource, TrainingJobInputDataConfigDataSourceS3DataSourceArgs

TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceHubAccessConfigArgs

TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfig, TrainingJobInputDataConfigDataSourceS3DataSourceModelAccessConfigArgs

TrainingJobInputDataConfigShuffleConfig, TrainingJobInputDataConfigShuffleConfigArgs

TrainingJobMlflowConfig, TrainingJobMlflowConfigArgs

TrainingJobModelPackageConfig, TrainingJobModelPackageConfigArgs

TrainingJobOutputDataConfig, TrainingJobOutputDataConfigArgs

TrainingJobProfilerConfig, TrainingJobProfilerConfigArgs

TrainingJobProfilerRuleConfiguration, TrainingJobProfilerRuleConfigurationArgs

TrainingJobRemoteDebugConfig, TrainingJobRemoteDebugConfigArgs

TrainingJobResourceConfig, TrainingJobResourceConfigArgs

TrainingJobResourceConfigInstanceGroup, TrainingJobResourceConfigInstanceGroupArgs

TrainingJobResourceConfigInstancePlacementConfig, TrainingJobResourceConfigInstancePlacementConfigArgs

TrainingJobResourceConfigInstancePlacementConfigPlacementSpecification, TrainingJobResourceConfigInstancePlacementConfigPlacementSpecificationArgs

TrainingJobRetryStrategy, TrainingJobRetryStrategyArgs

TrainingJobServerlessJobConfig, TrainingJobServerlessJobConfigArgs

TrainingJobSessionChainingConfig, TrainingJobSessionChainingConfigArgs

TrainingJobStoppingCondition, TrainingJobStoppingConditionArgs

TrainingJobTensorBoardOutputConfig, TrainingJobTensorBoardOutputConfigArgs

TrainingJobTimeouts, TrainingJobTimeoutsArgs

TrainingJobVpcConfig, TrainingJobVpcConfigArgs

Import

Identity Schema

Required

Optional

Package Details

On this page

On this page