gRPC Best Practices for Microservices

May 11, 2020

gRPC has become a popular choice for internal service communication. It’s efficient, type-safe, and supports streaming. But like any technology, using it well requires understanding its patterns and pitfalls.

Here’s how to use gRPC effectively in production.

Why gRPC

Compared to REST/JSON

REST/JSON:
- Text-based serialization (larger payloads)
- Schema separate from implementation (can drift)
- HTTP/1.1 typically (connection overhead)
- Flexible but less structured

gRPC:
- Binary serialization (Protocol Buffers)
- Schema is the source of truth
- HTTP/2 (multiplexed connections)
- Strongly typed, code generated

When to Use gRPC

Good fit:

Less ideal:

Protocol Buffer Design

Message Design

// user.proto
syntax = "proto3";
package user.v1;

import "google/protobuf/timestamp.proto";

message User {
  string id = 1;
  string email = 2;
  string display_name = 3;
  google.protobuf.Timestamp created_at = 4;
  UserStatus status = 5;

  // Nested message for address
  Address address = 6;

  // Repeated field for tags
  repeated string tags = 7;

  // Reserved fields for removed/deprecated
  reserved 8, 9;
  reserved "legacy_field";
}

enum UserStatus {
  USER_STATUS_UNSPECIFIED = 0;
  USER_STATUS_ACTIVE = 1;
  USER_STATUS_SUSPENDED = 2;
  USER_STATUS_DELETED = 3;
}

message Address {
  string street = 1;
  string city = 2;
  string country = 3;
  string postal_code = 4;
}

Naming Conventions

// Package: lowercase, dot-separated
package mycompany.users.v1;

// Messages: PascalCase
message UserAccount { ... }

// Fields: snake_case
string user_name = 1;

// Enums: SCREAMING_SNAKE_CASE
enum Status {
  STATUS_UNSPECIFIED = 0;  // Always have zero value
  STATUS_ACTIVE = 1;
}

// Services: PascalCase
service UserService { ... }

// Methods: PascalCase
rpc GetUser(GetUserRequest) returns (GetUserResponse);

Service Design

// user_service.proto
service UserService {
  // Unary: request -> response
  rpc GetUser(GetUserRequest) returns (GetUserResponse);
  rpc CreateUser(CreateUserRequest) returns (CreateUserResponse);
  rpc UpdateUser(UpdateUserRequest) returns (UpdateUserResponse);
  rpc DeleteUser(DeleteUserRequest) returns (DeleteUserResponse);

  // Server streaming: request -> stream of responses
  rpc ListUsers(ListUsersRequest) returns (stream User);

  // Client streaming: stream of requests -> response
  rpc BatchCreateUsers(stream CreateUserRequest) returns (BatchCreateUsersResponse);

  // Bidirectional streaming
  rpc SyncUsers(stream SyncRequest) returns (stream SyncResponse);
}

// Request/Response messages per method
message GetUserRequest {
  string user_id = 1;
}

message GetUserResponse {
  User user = 1;
}

message ListUsersRequest {
  int32 page_size = 1;
  string page_token = 2;
  string filter = 3;
}

API Versioning

Package-Based Versioning

// v1/user.proto
package mycompany.users.v1;

service UserService {
  rpc GetUser(GetUserRequest) returns (GetUserResponse);
}

// v2/user.proto
package mycompany.users.v2;

service UserService {
  rpc GetUser(GetUserRequest) returns (GetUserResponse);
  // New method in v2
  rpc GetUserProfile(GetUserProfileRequest) returns (GetUserProfileResponse);
}

Backward Compatibility Rules

// SAFE changes (backward compatible):
// - Add new fields (with new field numbers)
// - Add new methods to service
// - Add new enum values (not zero)
// - Mark fields as reserved

// BREAKING changes (not backward compatible):
// - Remove or rename fields
// - Change field types
// - Change field numbers
// - Remove methods
// - Change method signatures

Error Handling

Status Codes

// Use appropriate gRPC status codes
import "google.golang.org/grpc/codes"
import "google.golang.org/grpc/status"

func (s *server) GetUser(ctx context.Context, req *pb.GetUserRequest) (*pb.GetUserResponse, error) {
    user, err := s.db.GetUser(ctx, req.UserId)
    if err != nil {
        if errors.Is(err, ErrNotFound) {
            return nil, status.Error(codes.NotFound, "user not found")
        }
        if errors.Is(err, ErrInvalidID) {
            return nil, status.Error(codes.InvalidArgument, "invalid user ID format")
        }
        // Don't leak internal errors
        log.Error("database error", "error", err)
        return nil, status.Error(codes.Internal, "internal error")
    }
    return &pb.GetUserResponse{User: user}, nil
}

Rich Error Details

// Use google.rpc.Status for detailed errors
import "google/rpc/status.proto";
import "google/rpc/error_details.proto";
// Return structured error details
import "google.golang.org/genproto/googleapis/rpc/errdetails"

func validationError(field, description string) error {
    st := status.New(codes.InvalidArgument, "validation failed")

    br := &errdetails.BadRequest{}
    br.FieldViolations = append(br.FieldViolations, &errdetails.BadRequest_FieldViolation{
        Field:       field,
        Description: description,
    })

    st, _ = st.WithDetails(br)
    return st.Err()
}

Interceptors (Middleware)

Server Interceptors

// Unary interceptor
func loggingInterceptor(
    ctx context.Context,
    req interface{},
    info *grpc.UnaryServerInfo,
    handler grpc.UnaryHandler,
) (interface{}, error) {
    start := time.Now()

    resp, err := handler(ctx, req)

    log.Info("gRPC call",
        "method", info.FullMethod,
        "duration", time.Since(start),
        "error", err,
    )

    return resp, err
}

// Stream interceptor
func streamLoggingInterceptor(
    srv interface{},
    ss grpc.ServerStream,
    info *grpc.StreamServerInfo,
    handler grpc.StreamHandler,
) error {
    start := time.Now()
    err := handler(srv, ss)
    log.Info("gRPC stream", "method", info.FullMethod, "duration", time.Since(start))
    return err
}

// Apply interceptors
server := grpc.NewServer(
    grpc.ChainUnaryInterceptor(
        loggingInterceptor,
        authInterceptor,
        recoveryInterceptor,
    ),
    grpc.ChainStreamInterceptor(
        streamLoggingInterceptor,
    ),
)

Client Interceptors

// Client-side interceptor for retries, tracing, etc.
conn, err := grpc.Dial(
    address,
    grpc.WithChainUnaryInterceptor(
        clientLoggingInterceptor,
        retryInterceptor,
    ),
)

Load Balancing

Client-Side Load Balancing

// Register resolver and balancer
import _ "google.golang.org/grpc/balancer/roundrobin"

conn, err := grpc.Dial(
    "dns:///my-service.default.svc.cluster.local:50051",
    grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"round_robin"}`),
)

Service Mesh Integration

With Istio/Linkerd, use passthrough:

# Istio destination rule for gRPC
apiVersion: networking.istio.io/v1alpha3
kind: DestinationRule
metadata:
  name: user-service
spec:
  host: user-service
  trafficPolicy:
    loadBalancer:
      simple: ROUND_ROBIN
    connectionPool:
      http:
        h2UpgradePolicy: UPGRADE  # Ensure HTTP/2

Health Checking

gRPC Health Protocol

// Standard health check service
service Health {
  rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
  rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}
// Implement health server
import "google.golang.org/grpc/health"
import "google.golang.org/grpc/health/grpc_health_v1"

healthServer := health.NewServer()
grpc_health_v1.RegisterHealthServer(server, healthServer)

// Update health status
healthServer.SetServingStatus("user.v1.UserService", grpc_health_v1.HealthCheckResponse_SERVING)

Kubernetes Integration

# Use grpc-health-probe
containers:
  - name: app
    livenessProbe:
      exec:
        command: ["/bin/grpc_health_probe", "-addr=:50051"]
      initialDelaySeconds: 5
    readinessProbe:
      exec:
        command: ["/bin/grpc_health_probe", "-addr=:50051"]
      initialDelaySeconds: 5

Deadlines and Timeouts

Always Set Deadlines

// Client: always set deadline
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

resp, err := client.GetUser(ctx, &pb.GetUserRequest{UserId: "123"})
if err != nil {
    if status.Code(err) == codes.DeadlineExceeded {
        // Handle timeout
    }
}

// Server: check remaining time
func (s *server) GetUser(ctx context.Context, req *pb.GetUserRequest) (*pb.GetUserResponse, error) {
    deadline, ok := ctx.Deadline()
    if ok && time.Until(deadline) < 100*time.Millisecond {
        return nil, status.Error(codes.DeadlineExceeded, "insufficient time")
    }
    // ... process request
}

Propagate Context

// Context propagates deadlines and cancellation
func (s *server) GetUserWithOrders(ctx context.Context, req *pb.Request) (*pb.Response, error) {
    // User service call inherits deadline
    user, err := s.userClient.GetUser(ctx, &userpb.GetUserRequest{UserId: req.UserId})
    if err != nil {
        return nil, err
    }

    // Order service call also inherits deadline
    orders, err := s.orderClient.ListOrders(ctx, &orderpb.ListOrdersRequest{UserId: req.UserId})
    if err != nil {
        return nil, err
    }

    return &pb.Response{User: user, Orders: orders}, nil
}

Testing

Unit Testing

func TestGetUser(t *testing.T) {
    // Create server
    s := &server{db: newMockDB()}

    // Call method directly
    resp, err := s.GetUser(context.Background(), &pb.GetUserRequest{
        UserId: "123",
    })

    require.NoError(t, err)
    assert.Equal(t, "123", resp.User.Id)
}

Integration Testing

func TestGetUserIntegration(t *testing.T) {
    // Start real server
    lis, _ := net.Listen("tcp", ":0")
    s := grpc.NewServer()
    pb.RegisterUserServiceServer(s, &server{})
    go s.Serve(lis)
    defer s.Stop()

    // Connect client
    conn, _ := grpc.Dial(lis.Addr().String(), grpc.WithInsecure())
    defer conn.Close()
    client := pb.NewUserServiceClient(conn)

    // Test
    resp, err := client.GetUser(context.Background(), &pb.GetUserRequest{
        UserId: "123",
    })
    require.NoError(t, err)
    assert.NotNil(t, resp.User)
}

Key Takeaways

gRPC provides efficiency and type safety. Use it for internal communication where those benefits matter most.