Infrastructure as Code 모범 사례 가이드 2026
서론
Infrastructure as Code(IaC)는 현대 DevOps 환경에서 필수 요소가 되었습니다. 2026년 현재, 클라우드 네이티브 환경의 복잡성이 증가하면서 효율적인 인프라 관리를 위한 체계적인 접근이 더욱 중요해졌습니다.
1. IaC 도구 비교 분석
1.1 Terraform vs Pulumi vs AWS CDK
# Terraform - HCL 문법
resource "aws_instance" "web" {
ami = data.aws_ami.ubuntu.id
instance_type = var.instance_type
vpc_security_group_ids = [aws_security_group.web.id]
subnet_id = aws_subnet.public.id
user_data = templatefile("${path.module}/user_data.sh", {
database_url = aws_rds_instance.main.endpoint
})
tags = {
Name = "web-server"
Environment = var.environment
}
}
// Pulumi - TypeScript
import * as aws from "@pulumi/aws";
import * as pulumi from "@pulumi/pulumi";
const webServer = new aws.ec2.Instance("web", {
ami: ubuntu.then(ami => ami.id),
instanceType: config.require("instanceType"),
vpcSecurityGroupIds: [webSecurityGroup.id],
subnetId: publicSubnet.id,
userData: pulumi.interpolate`#!/bin/bash
echo "Database URL: ${database.endpoint}" > /etc/database.conf
`,
tags: {
Name: "web-server",
Environment: environment,
},
});
// AWS CDK - TypeScript
import * as ec2 from '@aws-cdk/aws-ec2';
import * as rds from '@aws-cdk/aws-rds';
const instance = new ec2.Instance(this, 'WebServer', {
instanceType: ec2.InstanceType.of(
ec2.InstanceClass.T3,
ec2.InstanceSize.MICRO
),
machineImage: ec2.MachineImage.latestAmazonLinux(),
vpc: vpc,
securityGroup: webSecurityGroup,
userData: ec2.UserData.custom(`
echo "Database URL: ${database.instanceEndpoint.socketAddress}" > /etc/database.conf
`),
});
1.2 도구별 특징 비교
| 특징 | Terraform | Pulumi | AWS CDK |
|---|---|---|---|
| 언어 지원 | HCL | Python, TypeScript, Go, C# | TypeScript, Python, Java, C# |
| 상태 관리 | 외부 상태 파일 | 외부 상태 파일 | CloudFormation 스택 |
| 프로바이더 생태계 | 매우 풍부 | 증가 중 | AWS 전용 |
| 학습 곡선 | 중간 | 낮음 (기존 언어 활용) | 낮음 (AWS 경험자) |
| 커뮤니티 | 매우 활발 | 성장 중 | AWS 주도 |
2. 모듈화 및 재사용 전략
2.1 Terraform 모듈 구조
modules/
├── networking/
│ ├── main.tf
│ ├── variables.tf
│ ├── outputs.tf
│ └── versions.tf
├── compute/
│ ├── ec2/
│ ├── ecs/
│ └── lambda/
├── data/
│ ├── rds/
│ ├── dynamodb/
│ └── s3/
└── security/
├── iam/
├── secrets/
└── waf/
2.2 네트워킹 모듈 예제
# modules/networking/main.tf
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(var.common_tags, {
Name = "${var.name_prefix}-vpc"
})
}
resource "aws_subnet" "public" {
count = length(var.public_subnet_cidrs)
vpc_id = aws_vpc.main.id
cidr_block = var.public_subnet_cidrs[count.index]
availability_zone = data.aws_availability_zones.available.names[count.index]
map_public_ip_on_launch = true
tags = merge(var.common_tags, {
Name = "${var.name_prefix}-public-${count.index + 1}"
Type = "public"
})
}
resource "aws_subnet" "private" {
count = length(var.private_subnet_cidrs)
vpc_id = aws_vpc.main.id
cidr_block = var.private_subnet_cidrs[count.index]
availability_zone = data.aws_availability_zones.available.names[count.index]
tags = merge(var.common_tags, {
Name = "${var.name_prefix}-private-${count.index + 1}"
Type = "private"
})
}
# NAT Gateway for private subnets
resource "aws_eip" "nat" {
count = var.enable_nat_gateway ? length(var.public_subnet_cidrs) : 0
domain = "vpc"
depends_on = [aws_internet_gateway.main]
tags = merge(var.common_tags, {
Name = "${var.name_prefix}-nat-eip-${count.index + 1}"
})
}
resource "aws_nat_gateway" "main" {
count = var.enable_nat_gateway ? length(var.public_subnet_cidrs) : 0
allocation_id = aws_eip.nat[count.index].id
subnet_id = aws_subnet.public[count.index].id
tags = merge(var.common_tags, {
Name = "${var.name_prefix}-nat-${count.index + 1}"
})
depends_on = [aws_internet_gateway.main]
}
2.3 모듈 사용 예제
# environments/production/main.tf
module "networking" {
source = "../../modules/networking"
name_prefix = "prod"
vpc_cidr = "10.0.0.0/16"
public_subnet_cidrs = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
private_subnet_cidrs = ["10.0.11.0/24", "10.0.12.0/24", "10.0.13.0/24"]
enable_nat_gateway = true
common_tags = {
Environment = "production"
Project = "web-app"
Owner = "platform-team"
}
}
module "compute" {
source = "../../modules/compute/ecs"
cluster_name = "prod-web-cluster"
vpc_id = module.networking.vpc_id
subnet_ids = module.networking.private_subnet_ids
common_tags = {
Environment = "production"
Project = "web-app"
}
depends_on = [module.networking]
}
3. 상태 관리 및 백엔드 구성
3.1 Terraform Remote State
# backend.tf
terraform {
backend "s3" {
bucket = "company-terraform-state"
key = "environments/production/terraform.tfstate"
region = "us-west-2"
encrypt = true
dynamodb_table = "terraform-state-lock"
}
}
# S3 버킷 및 DynamoDB 테이블 생성
resource "aws_s3_bucket" "terraform_state" {
bucket = "company-terraform-state"
}
resource "aws_s3_bucket_versioning" "terraform_state" {
bucket = aws_s3_bucket.terraform_state.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_server_side_encryption_configuration" "terraform_state" {
bucket = aws_s3_bucket.terraform_state.id
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
resource "aws_dynamodb_table" "terraform_state_lock" {
name = "terraform-state-lock"
billing_mode = "PAY_PER_REQUEST"
hash_key = "LockID"
attribute {
name = "LockID"
type = "S"
}
tags = {
Name = "TerraformStateLock"
}
}
3.2 Pulumi 상태 관리
// Pulumi.yaml
name: web-application
runtime: nodejs
backend:
url: s3://company-pulumi-state
// index.ts에서 상태 참조
import * as pulumi from "@pulumi/pulumi";
const stackRef = new pulumi.StackReference("company/networking/production");
const vpcId = stackRef.getOutput("vpcId");
const subnetIds = stackRef.getOutput("privateSubnetIds");
4. 환경별 관리 전략
4.1 Workspace 기반 관리
# Terraform workspace 생성 및 관리
terraform workspace new development
terraform workspace new staging
terraform workspace new production
# 환경별 변수 파일
terraform apply -var-file="environments/development.tfvars"
terraform apply -var-file="environments/production.tfvars"
4.2 환경별 변수 파일
# environments/development.tfvars
environment = "development"
instance_type = "t3.micro"
min_size = 1
max_size = 3
desired_capacity = 1
# environments/production.tfvars
environment = "production"
instance_type = "t3.large"
min_size = 3
max_size = 10
desired_capacity = 5
4.3 조건부 리소스 생성
# 환경에 따른 조건부 생성
resource "aws_cloudwatch_log_group" "app_logs" {
count = var.environment == "production" ? 1 : 0
name = "/aws/application/${var.app_name}"
retention_in_days = 30
}
# 환경별 설정
locals {
environment_config = {
development = {
instance_type = "t3.micro"
replica_count = 1
storage_size = 20
}
staging = {
instance_type = "t3.small"
replica_count = 2
storage_size = 50
}
production = {
instance_type = "t3.large"
replica_count = 3
storage_size = 100
}
}
config = local.environment_config[var.environment]
}
resource "aws_instance" "app" {
count = local.config.replica_count
ami = data.aws_ami.ubuntu.id
instance_type = local.config.instance_type
}
5. 보안 모범 사례
5.1 IAM 역할 기반 접근
# terraform-execution-role.tf
resource "aws_iam_role" "terraform_execution" {
name = "TerraformExecutionRole"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
},
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
AWS = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:user/terraform-ci"
}
Condition = {
StringEquals = {
"sts:ExternalId" = var.external_id
}
}
}
]
})
}
resource "aws_iam_policy" "terraform_execution" {
name = "TerraformExecutionPolicy"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ec2:*",
"rds:*",
"s3:*",
"iam:*",
"lambda:*",
"apigateway:*",
"cloudformation:*"
]
Resource = "*"
},
{
Effect = "Deny"
Action = [
"iam:DeleteRole",
"iam:DeleteUser",
"iam:DeletePolicy"
]
Resource = "*"
Condition = {
StringNotEquals = {
"aws:PrincipalTag/Environment" = var.environment
}
}
}
]
})
}
5.2 시크릿 관리
# AWS Secrets Manager 활용
resource "aws_secretsmanager_secret" "database_password" {
name = "${var.environment}-database-password"
description = "Database password for ${var.environment}"
recovery_window_in_days = 7
tags = {
Environment = var.environment
Application = var.application_name
}
}
resource "aws_secretsmanager_secret_version" "database_password" {
secret_id = aws_secretsmanager_secret.database_password.id
secret_string = jsonencode({
password = random_password.database.result
})
}
resource "random_password" "database" {
length = 32
special = true
}
# RDS에서 시크릿 사용
resource "aws_db_instance" "main" {
identifier = "${var.environment}-database"
engine = "postgres"
engine_version = "13.7"
instance_class = "db.t3.micro"
allocated_storage = 20
storage_encrypted = true
db_name = var.database_name
username = var.database_username
password = random_password.database.result
manage_master_user_password = true
vpc_security_group_ids = [aws_security_group.database.id]
db_subnet_group_name = aws_db_subnet_group.main.name
backup_retention_period = var.environment == "production" ? 7 : 1
backup_window = "03:00-04:00"
maintenance_window = "sun:04:00-sun:05:00"
skip_final_snapshot = var.environment != "production"
tags = {
Environment = var.environment
}
}
5.3 리소스 태깅 전략
# 공통 태그 정의
locals {
common_tags = {
Environment = var.environment
Project = var.project_name
Owner = var.team_name
CostCenter = var.cost_center
CreatedBy = "Terraform"
CreatedAt = timestamp()
}
}
# 태그 정책 적용
resource "aws_organizations_policy" "tagging_policy" {
name = "RequiredTaggingPolicy"
description = "Enforce required tags on resources"
type = "TAG_POLICY"
content = jsonencode({
tags = {
Environment = {
tag_key = {
"@@assign" = "Environment"
}
tag_value = {
"@@assign" = ["development", "staging", "production"]
}
enforced_for = {
"@@assign" = ["ec2:instance", "rds:db", "s3:bucket"]
}
}
}
})
}
6. CI/CD 통합
6.1 GitHub Actions 워크플로우
# .github/workflows/terraform.yml
name: Terraform CI/CD
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
env:
TF_VERSION: 1.5.0
AWS_REGION: us-west-2
jobs:
validate:
name: Validate Terraform
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{ env.TF_VERSION }}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Terraform Format Check
run: terraform fmt -check -recursive
- name: Terraform Init
run: terraform init
- name: Terraform Validate
run: terraform validate
- name: Terraform Plan
run: terraform plan -out=tfplan
- name: Upload Plan
uses: actions/upload-artifact@v3
with:
name: terraform-plan
path: tfplan
security-scan:
name: Security Scan
runs-on: ubuntu-latest
needs: validate
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Run Checkov
id: checkov
uses: bridgecrewio/checkov-action@master
with:
directory: .
framework: terraform
output_format: sarif
output_file_path: checkov.sarif
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@v2
if: success() || failure()
with:
sarif_file: checkov.sarif
apply:
name: Apply Terraform
runs-on: ubuntu-latest
needs: [validate, security-scan]
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
environment: production
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{ env.TF_VERSION }}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Download Plan
uses: actions/download-artifact@v3
with:
name: terraform-plan
- name: Terraform Init
run: terraform init
- name: Terraform Apply
run: terraform apply -auto-approve tfplan
6.2 GitLab CI 파이프라인
# .gitlab-ci.yml
stages:
- validate
- plan
- security
- apply
variables:
TF_VERSION: "1.5.0"
TF_ROOT: ${CI_PROJECT_DIR}
TF_ADDRESS: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/terraform/state/${CI_ENVIRONMENT_NAME}
cache:
key: "${TF_ROOT}"
paths:
- ${TF_ROOT}/.terraform
before_script:
- cd ${TF_ROOT}
- gitlab-terraform init
validate:
stage: validate
image:
name: hashicorp/terraform:${TF_VERSION}
entrypoint: [""]
script:
- gitlab-terraform fmt -check
- gitlab-terraform validate
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
plan:
stage: plan
image:
name: hashicorp/terraform:${TF_VERSION}
entrypoint: [""]
script:
- gitlab-terraform plan
- gitlab-terraform plan-json
artifacts:
name: plan
paths:
- ${TF_ROOT}/plan.cache
reports:
terraform: ${TF_ROOT}/plan.json
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
security-scan:
stage: security
image:
name: bridgecrew/checkov:latest
entrypoint: [""]
script:
- checkov -d . --framework terraform --output cli --output junitxml --output-file-path console,checkov.xml
artifacts:
reports:
junit: checkov.xml
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
apply:
stage: apply
image:
name: hashicorp/terraform:${TF_VERSION}
entrypoint: [""]
script:
- gitlab-terraform apply
dependencies:
- plan
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: manual
only:
- main
7. 테스트 전략
7.1 Terraform 단위 테스트
// tests/networking_test.go
package test
import (
"testing"
"github.com/gruntwork-io/terratest/modules/aws"
"github.com/gruntwork-io/terratest/modules/terraform"
"github.com/stretchr/testify/assert"
)
func TestNetworkingModule(t *testing.T) {
t.Parallel()
terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{
TerraformDir: "../modules/networking",
Vars: map[string]interface{}{
"name_prefix": "test",
"vpc_cidr": "10.0.0.0/16",
"public_subnet_cidrs": []string{"10.0.1.0/24", "10.0.2.0/24"},
"private_subnet_cidrs": []string{"10.0.11.0/24", "10.0.12.0/24"},
"enable_nat_gateway": true,
"common_tags": map[string]string{
"Environment": "test",
"Project": "terratest",
},
},
})
defer terraform.Destroy(t, terraformOptions)
terraform.InitAndApply(t, terraformOptions)
// VPC 검증
vpcId := terraform.Output(t, terraformOptions, "vpc_id")
assert.NotEmpty(t, vpcId)
vpc := aws.GetVpcById(t, vpcId, "us-west-2")
assert.Equal(t, "10.0.0.0/16", *vpc.CidrBlock)
// 서브넷 검증
publicSubnetIds := terraform.OutputList(t, terraformOptions, "public_subnet_ids")
assert.Len(t, publicSubnetIds, 2)
for _, subnetId := range publicSubnetIds {
subnet := aws.GetSubnetById(t, subnetId, "us-west-2")
assert.True(t, *subnet.MapPublicIpOnLaunch)
}
}
7.2 Pulumi 테스트
// tests/networking.test.ts
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
import { expect } from "chai";
import "mocha";
pulumi.runtime.setMocks({
newResource: (args: pulumi.runtime.MockResourceArgs): {id: string, state: any} => {
switch (args.type) {
case "aws:ec2/vpc:Vpc":
return { id: "vpc-12345", state: { cidrBlock: "10.0.0.0/16" } };
case "aws:ec2/subnet:Subnet":
return { id: "subnet-12345", state: { cidrBlock: "10.0.1.0/24" } };
default:
return { id: args.name + "_id", state: args.inputs };
}
},
call: (args: pulumi.runtime.MockCallArgs) => {
return args.inputs;
},
});
describe("Networking Infrastructure", () => {
let infra: typeof import("../index");
before(async () => {
infra = await import("../index");
});
it("should create VPC with correct CIDR", (done) => {
pulumi.all([infra.vpc.cidrBlock]).apply(([cidrBlock]) => {
expect(cidrBlock).to.equal("10.0.0.0/16");
done();
});
});
it("should create public subnets", (done) => {
pulumi.all([infra.publicSubnets]).apply(([subnets]) => {
expect(subnets).to.have.lengthOf(2);
done();
});
});
});
7.3 통합 테스트
# tests/integration/test_full_stack.py
import boto3
import pytest
import time
from moto import mock_ec2, mock_rds
@pytest.fixture
def aws_credentials():
"""Mocked AWS Credentials for moto."""
import os
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
@mock_ec2
@mock_rds
def test_infrastructure_deployment(aws_credentials):
"""Test complete infrastructure deployment."""
# EC2 클라이언트 생성
ec2 = boto3.client("ec2", region_name="us-west-2")
rds = boto3.client("rds", region_name="us-west-2")
# VPC 생성 테스트
vpc_response = ec2.create_vpc(CidrBlock="10.0.0.0/16")
vpc_id = vpc_response["Vpc"]["VpcId"]
# 서브넷 생성 테스트
subnet_response = ec2.create_subnet(
VpcId=vpc_id,
CidrBlock="10.0.1.0/24"
)
subnet_id = subnet_response["Subnet"]["SubnetId"]
# 보안 그룹 생성 테스트
sg_response = ec2.create_security_group(
GroupName="test-sg",
Description="Test security group",
VpcId=vpc_id
)
sg_id = sg_response["GroupId"]
# 인스턴스 시작 테스트
instances = ec2.run_instances(
ImageId="ami-12345678",
MinCount=1,
MaxCount=1,
InstanceType="t2.micro",
SubnetId=subnet_id,
SecurityGroupIds=[sg_id]
)
instance_id = instances["Instances"][0]["InstanceId"]
# 인스턴스 상태 확인
waiter = ec2.get_waiter("instance_running")
waiter.wait(InstanceIds=[instance_id])
# 검증
instance = ec2.describe_instances(InstanceIds=[instance_id])
assert instance["Reservations"][0]["Instances"][0]["State"]["Name"] == "running"
8. 비용 최적화
8.1 리소스 스케줄링
# 개발 환경 자동 종료
resource "aws_lambda_function" "instance_scheduler" {
filename = "instance_scheduler.zip"
function_name = "instance-scheduler"
role = aws_iam_role.lambda_role.arn
handler = "index.handler"
runtime = "python3.9"
environment {
variables = {
ENVIRONMENT = var.environment
}
}
}
resource "aws_cloudwatch_event_rule" "stop_instances" {
count = var.environment == "development" ? 1 : 0
name = "stop-dev-instances"
description = "Stop development instances at 6 PM"
schedule_expression = "cron(0 18 * * MON-FRI *)"
}
resource "aws_cloudwatch_event_rule" "start_instances" {
count = var.environment == "development" ? 1 : 0
name = "start-dev-instances"
description = "Start development instances at 9 AM"
schedule_expression = "cron(0 9 * * MON-FRI *)"
}
8.2 비용 태깅 및 모니터링
# 비용 할당 태그
resource "aws_ce_cost_category" "project_costs" {
name = "ProjectCosts"
rule {
value = "WebApplication"
rule {
and {
tag {
key = "Project"
values = ["web-app"]
match_options = ["EQUALS"]
}
}
}
}
rule {
value = "DataPipeline"
rule {
and {
tag {
key = "Project"
values = ["data-pipeline"]
match_options = ["EQUALS"]
}
}
}
}
}
# 비용 예산 설정
resource "aws_budgets_budget" "monthly_cost" {
name = "${var.environment}-monthly-budget"
budget_type = "COST"
limit_amount = var.monthly_budget_limit
limit_unit = "USD"
time_unit = "MONTHLY"
cost_filters = {
Tag = ["Environment:${var.environment}"]
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 80
threshold_type = "PERCENTAGE"
notification_type = "ACTUAL"
subscriber_email_addresses = [var.budget_notification_email]
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 100
threshold_type = "PERCENTAGE"
notification_type = "FORECASTED"
subscriber_email_addresses = [var.budget_notification_email]
}
}
9. 모니터링 및 관찰가능성
9.1 Terraform 드리프트 감지
# scripts/drift_detection.py
import boto3
import json
import subprocess
import sys
from datetime import datetime
class TerraformDriftDetector:
def __init__(self, state_bucket, state_key, region):
self.state_bucket = state_bucket
self.state_key = state_key
self.region = region
self.s3_client = boto3.client('s3', region_name=region)
def get_terraform_state(self):
"""Terraform 상태 파일에서 리소스 목록 추출"""
try:
response = self.s3_client.get_object(
Bucket=self.state_bucket,
Key=self.state_key
)
state_content = response['Body'].read()
state_data = json.loads(state_content)
resources = []
for resource in state_data.get('resources', []):
if resource['type'] == 'aws_instance':
for instance in resource['instances']:
resources.append({
'type': 'ec2_instance',
'id': instance['attributes']['id'],
'terraform_config': instance['attributes']
})
return resources
except Exception as e:
print(f"Error reading Terraform state: {e}")
return []
def get_aws_resources(self, resource_type, resource_ids):
"""AWS에서 실제 리소스 상태 조회"""
if resource_type == 'ec2_instance':
ec2 = boto3.client('ec2', region_name=self.region)
try:
response = ec2.describe_instances(InstanceIds=resource_ids)
actual_resources = {}
for reservation in response['Reservations']:
for instance in reservation['Instances']:
actual_resources[instance['InstanceId']] = {
'instance_type': instance['InstanceType'],
'state': instance['State']['Name'],
'security_groups': [sg['GroupId'] for sg in instance['SecurityGroups']],
'tags': {tag['Key']: tag['Value'] for tag in instance.get('Tags', [])}
}
return actual_resources
except Exception as e:
print(f"Error describing instances: {e}")
return {}
def detect_drift(self):
"""드리프트 감지 및 보고"""
terraform_resources = self.get_terraform_state()
drift_report = []
for tf_resource in terraform_resources:
resource_type = tf_resource['type']
resource_id = tf_resource['id']
tf_config = tf_resource['terraform_config']
if resource_type == 'ec2_instance':
aws_resources = self.get_aws_resources(resource_type, [resource_id])
if resource_id not in aws_resources:
drift_report.append({
'resource_id': resource_id,
'drift_type': 'MISSING',
'description': f'Resource {resource_id} exists in Terraform but not in AWS'
})
continue
aws_resource = aws_resources[resource_id]
# 인스턴스 타입 체크
if tf_config.get('instance_type') != aws_resource.get('instance_type'):
drift_report.append({
'resource_id': resource_id,
'drift_type': 'CONFIGURATION',
'attribute': 'instance_type',
'terraform_value': tf_config.get('instance_type'),
'aws_value': aws_resource.get('instance_type')
})
# 태그 체크
tf_tags = tf_config.get('tags', {})
aws_tags = aws_resource.get('tags', {})
for key, value in tf_tags.items():
if key not in aws_tags or aws_tags[key] != value:
drift_report.append({
'resource_id': resource_id,
'drift_type': 'TAG_MISMATCH',
'attribute': f'tag:{key}',
'terraform_value': value,
'aws_value': aws_tags.get(key, 'NOT_SET')
})
return drift_report
def generate_report(self):
"""드리프트 리포트 생성"""
drift_issues = self.detect_drift()
if not drift_issues:
print("✅ No configuration drift detected")
return True
print("⚠️ Configuration drift detected:")
print("=" * 50)
for issue in drift_issues:
print(f"Resource: {issue['resource_id']}")
print(f"Type: {issue['drift_type']}")
if 'attribute' in issue:
print(f"Attribute: {issue['attribute']}")
print(f"Terraform: {issue['terraform_value']}")
print(f"AWS: {issue['aws_value']}")
else:
print(f"Description: {issue['description']}")
print("-" * 30)
return False
if __name__ == "__main__":
detector = TerraformDriftDetector(
state_bucket="company-terraform-state",
state_key="environments/production/terraform.tfstate",
region="us-west-2"
)
success = detector.generate_report()
sys.exit(0 if success else 1)
9.2 CloudWatch 대시보드
# monitoring/cloudwatch.tf
resource "aws_cloudwatch_dashboard" "infrastructure" {
dashboard_name = "${var.environment}-infrastructure"
dashboard_body = jsonencode({
widgets = [
{
type = "metric"
x = 0
y = 0
width = 12
height = 6
properties = {
metrics = [
["AWS/EC2", "CPUUtilization", "AutoScalingGroupName", aws_autoscaling_group.web.name],
["AWS/ApplicationELB", "TargetResponseTime", "LoadBalancer", aws_lb.web.arn_suffix],
["AWS/RDS", "DatabaseConnections", "DBInstanceIdentifier", aws_db_instance.main.id]
]
view = "timeSeries"
stacked = false
region = var.aws_region
title = "Infrastructure Metrics"
period = 300
}
},
{
type = "log"
x = 0
y = 6
width = 24
height = 6
properties = {
query = <<-EOT
SOURCE '/aws/lambda/infrastructure-monitor'
| fields @timestamp, @message
| filter @message like /ERROR/
| sort @timestamp desc
| limit 20
EOT
region = var.aws_region
title = "Infrastructure Errors"
view = "table"
}
}
]
})
}
# 알람 설정
resource "aws_cloudwatch_metric_alarm" "high_cpu" {
alarm_name = "${var.environment}-high-cpu-utilization"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "This metric monitors ec2 cpu utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.web.name
}
}
10. 대규모 환경 관리
10.1 계층형 모듈 구조
infrastructure/
├── modules/
│ ├── foundation/
│ │ ├── networking/
│ │ ├── security/
│ │ └── monitoring/
│ ├── platform/
│ │ ├── compute/
│ │ ├── data/
│ │ └── messaging/
│ └── applications/
│ ├── web-app/
│ ├── api-gateway/
│ └── microservices/
├── environments/
│ ├── development/
│ ├── staging/
│ └── production/
└── shared/
├── terraform.tf
├── variables.tf
└── outputs.tf
10.2 의존성 관리
# environments/production/main.tf
module "foundation" {
source = "../../modules/foundation"
environment = "production"
vpc_cidr = "10.0.0.0/16"
availability_zones = ["us-west-2a", "us-west-2b", "us-west-2c"]
common_tags = local.common_tags
}
module "platform" {
source = "../../modules/platform"
vpc_id = module.foundation.vpc_id
private_subnet_ids = module.foundation.private_subnet_ids
public_subnet_ids = module.foundation.public_subnet_ids
database_subnet_group = module.foundation.database_subnet_group_name
cache_subnet_group = module.foundation.cache_subnet_group_name
common_tags = local.common_tags
depends_on = [module.foundation]
}
module "applications" {
for_each = var.applications
source = "../../modules/applications/${each.key}"
vpc_id = module.foundation.vpc_id
subnet_ids = module.foundation.private_subnet_ids
load_balancer_arn = module.platform.application_load_balancer_arn
database_endpoint = module.platform.database_endpoint
cache_endpoint = module.platform.cache_endpoint
application_config = each.value
common_tags = local.common_tags
depends_on = [module.platform]
}
10.3 멀티 리전 배포
# multi-region/main.tf
locals {
regions = ["us-west-2", "us-east-1", "eu-west-1"]
}
module "regional_infrastructure" {
for_each = toset(local.regions)
source = "./modules/regional"
region = each.value
environment = var.environment
# 글로벌 설정
global_tags = var.global_tags
# 리전별 설정
primary_region = each.value == "us-west-2"
# 교차 리전 복제
replication_regions = [for r in local.regions : r if r != each.value]
providers = {
aws = aws.${replace(each.value, "-", "_")}
}
}
# Route 53 헬스 체크 및 장애 조치
resource "aws_route53_health_check" "regional" {
for_each = toset(local.regions)
fqdn = module.regional_infrastructure[each.value].load_balancer_dns
port = 443
type = "HTTPS"
resource_path = "/health"
failure_threshold = "3"
request_interval = "30"
cloudwatch_alarm_region = each.value
cloudwatch_alarm_name = "HealthCheck-${each.value}"
insufficient_data_health_status = "Failure"
tags = {
Name = "HealthCheck-${each.value}"
}
}
resource "aws_route53_record" "failover" {
for_each = toset(local.regions)
zone_id = aws_route53_zone.main.zone_id
name = var.domain_name
type = "A"
set_identifier = each.value
failover_routing_policy {
type = each.value == "us-west-2" ? "PRIMARY" : "SECONDARY"
}
alias {
name = module.regional_infrastructure[each.value].load_balancer_dns
zone_id = module.regional_infrastructure[each.value].load_balancer_zone_id
evaluate_target_health = true
}
health_check_id = aws_route53_health_check.regional[each.value].id
}
11. 트러블슈팅 가이드
11.1 일반적인 문제 해결
#!/bin/bash
# scripts/terraform_troubleshoot.sh
echo "Terraform 트러블슈팅 도구"
echo "========================"
# 상태 파일 검사
check_state_file() {
echo "상태 파일 검사 중..."
if terraform state list > /dev/null 2>&1; then
echo "✅ 상태 파일 접근 가능"
# 고아 리소스 검사
terraform state list | while read resource; do
if ! terraform show "$resource" > /dev/null 2>&1; then
echo "⚠️ 고아 리소스 발견: $resource"
fi
done
else
echo "❌ 상태 파일 접근 불가"
echo "백엔드 설정을 확인하세요"
exit 1
fi
}
# 의존성 검사
check_dependencies() {
echo "의존성 검사 중..."
# Provider 버전 검사
terraform version
# 초기화 상태 검사
if [ ! -d ".terraform" ]; then
echo "⚠️ Terraform이 초기화되지 않았습니다"
echo "terraform init을 실행하세요"
fi
# 플러그인 검사
terraform providers
}
# 구성 검증
validate_configuration() {
echo "구성 검증 중..."
# 형식 검사
terraform fmt -check -recursive
# 구성 검증
terraform validate
# 계획 검사
if terraform plan -detailed-exitcode > /dev/null 2>&1; then
echo "✅ 계획 검사 통과"
else
exit_code=$?
case $exit_code in
1)
echo "❌ 계획 실패 - 구성 오류"
;;
2)
echo "ℹ️ 계획 성공 - 변경 사항 있음"
;;
esac
fi
}
# 리소스 상태 확인
check_resource_drift() {
echo "리소스 드리프트 검사 중..."
terraform plan -detailed-exitcode > plan_output.txt 2>&1
exit_code=$?
if [ $exit_code -eq 2 ]; then
echo "⚠️ 구성 드리프트 감지됨"
echo "변경 사항:"
grep -A 5 -B 5 "will be" plan_output.txt
fi
rm -f plan_output.txt
}
# 메인 실행
main() {
check_state_file
echo ""
check_dependencies
echo ""
validate_configuration
echo ""
check_resource_drift
}
main "$@"
11.2 성능 최적화
# terraform/performance.tf
# 병렬 처리 최적화
terraform {
experiments = [module_variable_optional_attrs]
}
# Provider 별칭을 통한 병렬 처리
provider "aws" {
alias = "us_west_2"
region = "us-west-2"
}
provider "aws" {
alias = "us_east_1"
region = "us-east-1"
}
# 데이터 소스 최적화
data "aws_availability_zones" "available" {
state = "available"
# 필터링으로 성능 향상
filter {
name = "zone-type"
values = ["availability-zone"]
}
}
# 지역 리소스 병렬 생성
resource "aws_s3_bucket" "regional" {
for_each = {
us_west_2 = "us-west-2"
us_east_1 = "us-east-1"
}
provider = aws.${each.key}
bucket = "company-${each.value}-${random_string.suffix.result}"
}
# 리소스 그룹화로 의존성 최적화
locals {
# 보안 그룹을 먼저 생성
security_groups = {
web = {
name_prefix = "web-sg"
ingress = [
{ from_port = 80, to_port = 80, protocol = "tcp", cidr_blocks = ["0.0.0.0/0"] },
{ from_port = 443, to_port = 443, protocol = "tcp", cidr_blocks = ["0.0.0.0/0"] }
]
}
app = {
name_prefix = "app-sg"
ingress = [
{ from_port = 8080, to_port = 8080, protocol = "tcp", security_groups = ["web"] }
]
}
}
}
resource "aws_security_group" "this" {
for_each = local.security_groups
name_prefix = each.value.name_prefix
vpc_id = aws_vpc.main.id
dynamic "ingress" {
for_each = each.value.ingress
content {
from_port = ingress.value.from_port
to_port = ingress.value.to_port
protocol = ingress.value.protocol
cidr_blocks = lookup(ingress.value, "cidr_blocks", null)
security_groups = lookup(ingress.value, "security_groups", null) != null ?
[aws_security_group.this[ingress.value.security_groups[0]].id] : null
}
}
}
12. 결론
Infrastructure as Code는 현대적인 클라우드 인프라 관리의 핵심입니다. 2026년 현재, 다음과 같은 트렌드가 주목받고 있습니다:
주요 발전 방향
-
선언적 구성의 진화
- GitOps 워크플로우 통합
- 정책 기반 관리 (OPA, Sentinel)
- 지속적 드리프트 감지
-
멀티 클라우드 전략
- 클라우드 중립적 추상화
- 하이브리드 인프라 관리
- 비용 최적화 자동화
-
보안 강화
- 시크릿 제로 접근 방식
- 런타임 보안 정책
- 컴플라이언스 자동화
-
관찰가능성 확대
- 인프라 메트릭 통합
- 예측적 장애 감지
- 비용 투명성 향상
구현 권장사항
# 프로젝트 체크리스트
infrastructure_checklist:
planning:
- [ ] 도구 선택 및 평가
- [ ] 모듈 설계 및 구조화
- [ ] 환경별 전략 수립
implementation:
- [ ] 코드 품질 표준 정의
- [ ] CI/CD 파이프라인 구축
- [ ] 테스트 전략 수립
operations:
- [ ] 모니터링 및 알림 설정
- [ ] 백업 및 복구 계획
- [ ] 드리프트 감지 자동화
governance:
- [ ] 보안 정책 적용
- [ ] 비용 관리 체계
- [ ] 컴플라이언스 검증
이 가이드를 통해 현대적이고 확장 가능한 Infrastructure as Code 환경을 구축하고, 지속적으로 발전하는 클라우드 생태계에 효과적으로 대응할 수 있을 것입니다.