fix(openrouter): use model-specific request modalities for image generation

2026-04-08 07:49:30 +02:00
parent d145cebe75
commit 1bc0611ce3
2 changed files with 84 additions and 1 deletions
--- a/convex/openrouter.ts
+++ b/convex/openrouter.ts
@@ -10,8 +10,12 @@ export interface OpenRouterModel {
  /** Gleiche Einheit wie UI „Cr“ / lib/ai-models creditCost */
  creditCost: number;
  minTier: "free" | "starter" | "pro" | "max";
+  requestModalities?: readonly ("image" | "text")[];
 }

+const IMAGE_AND_TEXT_MODALITIES = ["image", "text"] as const;
+const IMAGE_ONLY_MODALITIES = ["image"] as const;
+
 export const IMAGE_MODELS: Record<string, OpenRouterModel> = {
  "google/gemini-2.5-flash-image": {
    id: "google/gemini-2.5-flash-image",
@@ -28,6 +32,7 @@ export const IMAGE_MODELS: Record<string, OpenRouterModel> = {
    estimatedCostPerImage: 2,
    creditCost: 2,
    minTier: "free",
+    requestModalities: IMAGE_ONLY_MODALITIES,
  },
  "bytedance-seed/seedream-4.5": {
    id: "bytedance-seed/seedream-4.5",
@@ -36,6 +41,7 @@ export const IMAGE_MODELS: Record<string, OpenRouterModel> = {
    estimatedCostPerImage: 5,
    creditCost: 5,
    minTier: "free",
+    requestModalities: IMAGE_ONLY_MODALITIES,
  },
  "google/gemini-3.1-flash-image-preview": {
    id: "google/gemini-3.1-flash-image-preview",
@@ -60,6 +66,7 @@ export const IMAGE_MODELS: Record<string, OpenRouterModel> = {
    estimatedCostPerImage: 9,
    creditCost: 9,
    minTier: "starter",
+    requestModalities: IMAGE_ONLY_MODALITIES,
  },
  "sourceful/riverflow-v2-pro": {
    id: "sourceful/riverflow-v2-pro",
@@ -68,6 +75,7 @@ export const IMAGE_MODELS: Record<string, OpenRouterModel> = {
    estimatedCostPerImage: 12,
    creditCost: 12,
    minTier: "starter",
+    requestModalities: IMAGE_ONLY_MODALITIES,
  },
  "google/gemini-3-pro-image-preview": {
    id: "google/gemini-3-pro-image-preview",
@@ -156,6 +164,8 @@ export async function generateImageViaOpenRouter(
  params: GenerateImageParams
 ): Promise<OpenRouterImageResponse> {
  const modelId = params.model ?? DEFAULT_IMAGE_MODEL;
+  const model = IMAGE_MODELS[modelId];
+  const requestModalities = model?.requestModalities ?? IMAGE_AND_TEXT_MODALITIES;
  const requestStartedAt = Date.now();

  console.info("[openrouter] request start", {
@@ -188,7 +198,7 @@ export async function generateImageViaOpenRouter(

  const body: Record<string, unknown> = {
    model: modelId,
-    modalities: ["image", "text"],
+    modalities: [...requestModalities],
    messages: [userMessage],
  };

--- a/tests/convex/openrouter.test.ts
+++ b/tests/convex/openrouter.test.ts
@@ -0,0 +1,73 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { generateImageViaOpenRouter } from "@/convex/openrouter";
+
+function createOpenRouterSuccessResponse(): Response {
+  return {
+    ok: true,
+    status: 200,
+    json: vi.fn(async () => ({
+      choices: [
+        {
+          message: {
+            images: [
+              {
+                image_url: {
+                  url: "data:image/png;base64,ZmFrZV9pbWFnZQ==",
+                },
+              },
+            ],
+          },
+        },
+      ],
+    })),
+  } as unknown as Response;
+}
+
+async function runRequestAndReadModalities(fetchMock: ReturnType<typeof vi.fn>, model: string) {
+  fetchMock.mockResolvedValueOnce(createOpenRouterSuccessResponse());
+
+  await generateImageViaOpenRouter("test-api-key", {
+    model,
+    prompt: "draw a fox",
+  });
+
+  const firstCallArgs = fetchMock.mock.calls[0];
+  const init = firstCallArgs?.[1] as RequestInit | undefined;
+  const bodyRaw = init?.body;
+  const bodyText = typeof bodyRaw === "string" ? bodyRaw : "";
+  const body = JSON.parse(bodyText) as { modalities?: string[] };
+  return body.modalities;
+}
+
+describe("openrouter request body", () => {
+  const fetchMock = vi.fn<typeof fetch>();
+
+  beforeEach(() => {
+    fetchMock.mockReset();
+    vi.stubGlobal("fetch", fetchMock);
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  it("uses image+text modalities for Gemini Flash Image", async () => {
+    await expect(
+      runRequestAndReadModalities(fetchMock, "google/gemini-2.5-flash-image"),
+    ).resolves.toEqual(["image", "text"]);
+  });
+
+  it("uses image-only modalities for text+image->image models", async () => {
+    const imageOnlyModels = [
+      "black-forest-labs/flux.2-klein-4b",
+      "bytedance-seed/seedream-4.5",
+      "sourceful/riverflow-v2-fast",
+      "sourceful/riverflow-v2-pro",
+    ] as const;
+
+    for (const model of imageOnlyModels) {
+      await expect(runRequestAndReadModalities(fetchMock, model)).resolves.toEqual(["image"]);
+    }
+  });
+});