Add production health checks
This commit is contained in:
+96
-3
@@ -12,8 +12,9 @@ import nodemailer, { type SendMailOptions } from 'nodemailer';
|
||||
import Stripe from 'stripe';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { db } from './db/client.js';
|
||||
import { ensureSchema } from './db/schema.js';
|
||||
import { adoptionReportQueueEvents, enqueueAdoptionReportJob } from './queues/adoptionReportQueue.js';
|
||||
import { adoptionReportQueueEvents, enqueueAdoptionReportJob, getAdoptionReportQueueCounts } from './queues/adoptionReportQueue.js';
|
||||
import { enqueueBirdMilestoneReminderJob, getBirdMilestoneReminderQueueCounts } from './queues/birdMilestoneReminderQueue.js';
|
||||
import { enqueueMedicationReminderJob, getMedicationReminderQueueCounts } from './queues/medicationReminderQueue.js';
|
||||
import {
|
||||
@@ -2265,6 +2266,59 @@ const ensureBirdWritable = (bird: BirdRow, res: Response) => {
|
||||
return false;
|
||||
};
|
||||
|
||||
type HealthCheckResult = {
|
||||
ok: boolean;
|
||||
latencyMs?: number;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
const withHealthTimeout = async <T,>(operation: Promise<T>, timeoutMs = 2_000): Promise<T> => {
|
||||
let timeout: NodeJS.Timeout | undefined;
|
||||
|
||||
try {
|
||||
return await Promise.race([
|
||||
operation,
|
||||
new Promise<never>((_resolve, reject) => {
|
||||
timeout = setTimeout(() => reject(new Error('Health check timed out')), timeoutMs);
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const checkPostgresHealth = async (): Promise<HealthCheckResult> => {
|
||||
const startedAt = Date.now();
|
||||
|
||||
try {
|
||||
await withHealthTimeout(db.query('SELECT 1'));
|
||||
return { ok: true, latencyMs: Date.now() - startedAt };
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
latencyMs: Date.now() - startedAt,
|
||||
error: error instanceof Error ? error.message : 'Postgres health check failed',
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const checkRedisHealth = async (): Promise<HealthCheckResult> => {
|
||||
const startedAt = Date.now();
|
||||
|
||||
try {
|
||||
await withHealthTimeout(getBirdMilestoneReminderQueueCounts());
|
||||
return { ok: true, latencyMs: Date.now() - startedAt };
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
latencyMs: Date.now() - startedAt,
|
||||
error: error instanceof Error ? error.message : 'Redis health check failed',
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const writeAuditLog = async (
|
||||
auth: AuthContext,
|
||||
action: string,
|
||||
@@ -2293,8 +2347,46 @@ const isBillingOnlyWorkspaceUpdate = (
|
||||
payload: z.infer<typeof workspaceSchema>,
|
||||
) => workspace.workspace_type === 'standard' && payload.workspaceType === 'standard' && payload.name === workspace.name;
|
||||
|
||||
app.get('/api/health', (_req: Request, res: Response) => {
|
||||
res.json({ ok: true });
|
||||
app.get('/api/health/live', (_req: Request, res: Response) => {
|
||||
res.json({
|
||||
ok: true,
|
||||
service: 'flockpal-backend',
|
||||
status: 'live',
|
||||
uptimeSeconds: Math.round(process.uptime()),
|
||||
checkedAt: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/health/ready', async (_req: Request, res: Response) => {
|
||||
const [postgres, redis] = await Promise.all([checkPostgresHealth(), checkRedisHealth()]);
|
||||
const ok = postgres.ok && redis.ok;
|
||||
|
||||
res.status(ok ? 200 : 503).json({
|
||||
ok,
|
||||
service: 'flockpal-backend',
|
||||
status: ok ? 'ready' : 'degraded',
|
||||
checkedAt: new Date().toISOString(),
|
||||
dependencies: {
|
||||
postgres,
|
||||
redis,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/health', async (_req: Request, res: Response) => {
|
||||
const [postgres, redis] = await Promise.all([checkPostgresHealth(), checkRedisHealth()]);
|
||||
const ok = postgres.ok && redis.ok;
|
||||
|
||||
res.status(ok ? 200 : 503).json({
|
||||
ok,
|
||||
service: 'flockpal-backend',
|
||||
status: ok ? 'ready' : 'degraded',
|
||||
checkedAt: new Date().toISOString(),
|
||||
dependencies: {
|
||||
postgres,
|
||||
redis,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/metrics', requireAuth, requireAdmin, async (_req: Request, res: Response, next: NextFunction) => {
|
||||
@@ -2326,6 +2418,7 @@ app.get('/api/metrics', requireAuth, requireAdmin, async (_req: Request, res: Re
|
||||
queues: {
|
||||
birdMilestoneReminders: birdMilestoneReminderQueueCounts,
|
||||
medicationReminders: medicationReminderQueueCounts,
|
||||
adoptionReports: await getAdoptionReportQueueCounts(),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
import { db } from './db/client.js';
|
||||
import { closeBirdMilestoneReminderQueue, getBirdMilestoneReminderQueueCounts } from './queues/birdMilestoneReminderQueue.js';
|
||||
|
||||
const timeoutMs = Number(process.env.HEALTHCHECK_TIMEOUT_MS ?? 5_000);
|
||||
|
||||
const withTimeout = async <T>(operation: Promise<T>, label: string): Promise<T> => {
|
||||
let timeout: NodeJS.Timeout | undefined;
|
||||
|
||||
try {
|
||||
return await Promise.race([
|
||||
operation,
|
||||
new Promise<never>((_resolve, reject) => {
|
||||
timeout = setTimeout(() => reject(new Error(`${label} timed out`)), timeoutMs);
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const checkHttp = async (path: string) => {
|
||||
const port = process.env.PORT ?? '5000';
|
||||
const response = await withTimeout(fetch(`http://127.0.0.1:${port}${path}`), path);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`${path} returned ${response.status}`);
|
||||
}
|
||||
};
|
||||
|
||||
const checkWorkerDependencies = async () => {
|
||||
await withTimeout(db.query('SELECT 1'), 'postgres');
|
||||
await withTimeout(getBirdMilestoneReminderQueueCounts(), 'redis');
|
||||
};
|
||||
|
||||
const mode = process.argv[2] ?? 'api-ready';
|
||||
|
||||
try {
|
||||
if (mode === 'api-live') {
|
||||
await checkHttp('/api/health/live');
|
||||
} else if (mode === 'api-ready') {
|
||||
await checkHttp('/api/health/ready');
|
||||
} else if (mode === 'worker') {
|
||||
await checkWorkerDependencies();
|
||||
} else {
|
||||
throw new Error(`Unknown healthcheck mode: ${mode}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : error);
|
||||
process.exitCode = 1;
|
||||
} finally {
|
||||
await Promise.allSettled([closeBirdMilestoneReminderQueue(), db.close()]);
|
||||
}
|
||||
@@ -40,3 +40,5 @@ export const closeAdoptionReportQueue = async () => {
|
||||
await adoptionReportQueue.close();
|
||||
await adoptionReportQueueEvents.close();
|
||||
};
|
||||
|
||||
export const getAdoptionReportQueueCounts = () => adoptionReportQueue.getJobCounts('waiting', 'active', 'delayed', 'completed', 'failed');
|
||||
|
||||
Reference in New Issue
Block a user