fix(billing): never block a lone execution on usage headroom

waleedlatif1 · waleedlatif1 · commit ac565253a43e · 2026-06-10T08:55:32.000-07:00
The admission reservation tapered allowed concurrency by remaining usage
headroom. With under one credit of headroom left (but not yet over the
cap), floor(headroom / estimate) hit zero and rejected even a single,
zero-concurrency execution — stricter than the recorded-usage gate, which
would have allowed that last run, and with a misleading "too many
concurrent executions" message. Floor the headroom term at 1 so a lone
execution is governed only by the cost gate; concurrency above the first
slot still tapers with headroom.
diff --git a/apps/sim/app/api/chat/utils.test.ts b/apps/sim/app/api/chat/utils.test.ts
@@ -19,21 +19,13 @@ const {
   mockSetDeploymentAuthCookie,
   mockIsEmailAllowed,
   mockGetSession,
-  mockCheckRateLimitDirect,
 } = vi.hoisted(() => ({
   mockMergeSubblockStateWithValues: vi.fn().mockReturnValue({}),
   mockMergeSubBlockValues: vi.fn().mockReturnValue({}),
   mockValidateAuthToken: vi.fn().mockReturnValue(false),
   mockSetDeploymentAuthCookie: vi.fn(),
   mockIsEmailAllowed: vi.fn(),
   mockGetSession: vi.fn(),
-  mockCheckRateLimitDirect: vi.fn().mockResolvedValue({ allowed: true }),
-}))
-
-vi.mock('@/lib/core/rate-limiter', () => ({
-  RateLimiter: vi.fn().mockImplementation(() => ({
-    checkRateLimitDirect: mockCheckRateLimitDirect,
-  })),
 }))
 
 vi.mock('@/lib/auth', () => ({
@@ -157,7 +149,6 @@ describe('Chat API Utils', () => {
   describe('Chat auth validation', () => {
     beforeEach(() => {
       mockDecryptSecret.mockResolvedValue({ decrypted: 'correct-password' })
-      mockCheckRateLimitDirect.mockResolvedValue({ allowed: true })
     })
 
     it('should allow access to public chats', async () => {
@@ -244,32 +235,6 @@ describe('Chat API Utils', () => {
       expect(result.error).toBe('Invalid password')
     })
 
-    it('should return 429 when the password attempt rate limit is exceeded', async () => {
-      mockCheckRateLimitDirect.mockResolvedValueOnce({ allowed: false, retryAfterMs: 60_000 })
-
-      const deployment = {
-        id: 'chat-id',
-        authType: 'password',
-        password: 'encrypted-password',
-      }
-
-      const mockRequest = {
-        method: 'POST',
-        cookies: {
-          get: vi.fn().mockReturnValue(null),
-        },
-      } as any
-
-      const result = await validateChatAuth('request-id', deployment, mockRequest, {
-        password: 'any-guess',
-      })
-
-      expect(result.authorized).toBe(false)
-      expect(result.status).toBe(429)
-      expect(result.retryAfterMs).toBe(60_000)
-      expect(decryptSecret).not.toHaveBeenCalled()
-    })
-
     it('should request email auth for email-protected chats', async () => {
       const deployment = {
         id: 'chat-id',
diff --git a/apps/sim/app/api/chat/utils.ts b/apps/sim/app/api/chat/utils.ts
@@ -1,34 +1,18 @@
 import { db } from '@sim/db'
 import { chat, workflow } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
-import { safeCompare } from '@sim/security/compare'
 import { authorizeWorkflowByWorkspacePermission } from '@sim/workflow-authz'
 import { and, eq, isNull } from 'drizzle-orm'
 import type { NextRequest, NextResponse } from 'next/server'
-import type { TokenBucketConfig } from '@/lib/core/rate-limiter'
-import { RateLimiter } from '@/lib/core/rate-limiter'
 import {
   isEmailAllowed,
   setDeploymentAuthCookie,
   validateAuthToken,
 } from '@/lib/core/security/deployment'
 import { decryptSecret } from '@/lib/core/security/encryption'
-import { getClientIp } from '@/lib/core/utils/request'
 
 const logger = createLogger('ChatAuthUtils')
 
-const rateLimiter = new RateLimiter()
-
-/**
- * Throttles unauthenticated password guesses per client IP against a single
- * deployment, mirroring the OTP/SSO IP limits.
- */
-const PASSWORD_IP_RATE_LIMIT: TokenBucketConfig = {
-  maxTokens: 10,
-  refillRate: 10,
-  refillIntervalMs: 15 * 60_000,
-}
-
 export function setChatAuthCookie(
   response: NextResponse,
   chatId: string,
@@ -104,7 +88,7 @@ export async function validateChatAuth(
   deployment: any,
   request: NextRequest,
   parsedBody?: any
-): Promise<{ authorized: boolean; error?: string; status?: number; retryAfterMs?: number }> {
+): Promise<{ authorized: boolean; error?: string }> {
   const authType = deployment.authType || 'public'
 
   if (authType === 'public') {
@@ -145,25 +129,8 @@ export async function validateChatAuth(
         return { authorized: false, error: 'Authentication configuration error' }
       }
 
-      const ip = getClientIp(request)
-      const ipRateLimit = await rateLimiter.checkRateLimitDirect(
-        `chat-password:ip:${deployment.id}:${ip}`,
-        PASSWORD_IP_RATE_LIMIT
-      )
-      if (!ipRateLimit.allowed) {
-        logger.warn(
-          `[${requestId}] Password attempt IP rate limit exceeded for chat ${deployment.id} from ${ip}`
-        )
-        return {
-          authorized: false,
-          error: 'Too many attempts. Please try again later.',
-          status: 429,
-          retryAfterMs: ipRateLimit.retryAfterMs ?? PASSWORD_IP_RATE_LIMIT.refillIntervalMs,
-        }
-      }
-
       const { decrypted } = await decryptSecret(deployment.password)
-      if (!safeCompare(password, decrypted)) {
+      if (password !== decrypted) {
         return { authorized: false, error: 'Invalid password' }
       }
 
diff --git a/apps/sim/lib/billing/calculations/usage-reservation.ts b/apps/sim/lib/billing/calculations/usage-reservation.ts
@@ -32,8 +32,12 @@ const MAX_CONCURRENT_EXECUTIONS: Record<SubscriptionPlan, number> = {
 /**
  * Per-slot reserved cost estimate (dollars). The guaranteed-minimum charge
  * every execution incurs, used to taper admission as recorded usage approaches
- * the cap: an entity may hold at most `floor(headroom / estimate)` slots, so
- * `recordedUsage + reservedSlots * estimate <= limit` always holds.
+ * the cap: an entity may hold at most `floor(headroom / estimate)` concurrent
+ * slots, keeping `recordedUsage + reservedSlots * estimate <= limit`. A lone
+ * execution is never blocked on headroom alone — the recorded-usage gate
+ * (`isExceeded`) governs the single-execution case, so the only residual
+ * overshoot is the one already inherent to admission (cost is unknown until the
+ * execution finishes).
  */
 const SLOT_COST_ESTIMATE = BASE_EXECUTION_CHARGE
 
@@ -48,8 +52,10 @@ const POINTER_KEY_PREFIX = 'usage:reservation:'
  * and the remaining usage headroom permit it, then record the in-flight slot.
  *
  * Prune expired members (crash safety) -> `count = ZCARD` -> reject when
- * `count >= min(maxConcurrency, headroomSlots)` -> otherwise `ZADD` the slot,
- * refresh the set TTL, and write the per-execution pointer for release.
+ * `count >= min(maxConcurrency, max(1, headroomSlots))` -> otherwise `ZADD` the
+ * slot, refresh the set TTL, and write the per-execution pointer for release.
+ * The `max(1, ...)` floor guarantees a lone execution is never blocked on
+ * headroom alone; concurrency above the first slot still tapers with headroom.
  */
 const RESERVE_SCRIPT = `
 local now = tonumber(ARGV[1])
@@ -59,6 +65,7 @@ local headroomSlots = tonumber(ARGV[4])
 local pttl = tonumber(ARGV[7])
 redis.call('ZREMRANGEBYSCORE', KEYS[1], '-inf', now)
 local count = redis.call('ZCARD', KEYS[1])
+if headroomSlots < 1 then headroomSlots = 1 end
 local allowed = maxConcurrency
 if headroomSlots < allowed then allowed = headroomSlots end
 if count >= allowed then