aslobot-matrix/src/services/ai/ai.ts

import { GoogleGenAI } from "@google/genai";
import { config } from "../../config.js";
import type {
    AIResponseImage,
    AIResponseText,
    AIToolMatrixData,
} from "./types.js";
import type { IAIInstructions } from "../../modules/ai/types.js";
import { FunctionCallingConfigMode } from "@google/genai";
import { toolFunctions, tools } from "./tools.js";
import type { FunctionResponse } from "@google/genai";
import type { Content } from "@google/genai";

const googleAI = new GoogleGenAI({
    apiKey: config.app.ai.api.key,
});

const getTextGemini = async (
    matrixData: AIToolMatrixData,
    instructions: IAIInstructions,
    input: string,
    oldInput?: string,
    inputImage?: Buffer<ArrayBuffer>,
): Promise<AIResponseText> => {
    const inputContent: Content = inputImage
        ? {
              role: "user",
              parts: [
                  {
                      text: input,
                  },
                  {
                      inlineData: {
                          mimeType: "image/png",
                          data: Buffer.from(inputImage).toString("base64"),
                      },
                  },
              ],
          }
        : {
              role: "user",
              parts: [
                  {
                      text: input,
                  },
              ],
          };
    const oldInputContent: Content = {
        role: "user",
        parts: [
            {
                text: oldInput ?? "",
            },
        ],
    };

    const contents: Content[] = oldInput
        ? [oldInputContent, inputContent]
        : [inputContent];

    const response = await googleAI.models.generateContent({
        model: "gemini-3-flash-preview",
        contents: contents,
        config: {
            systemInstruction: JSON.stringify(instructions),
            toolConfig: {
                functionCallingConfig: {
                    mode: FunctionCallingConfigMode.AUTO,
                },
            },
            tools: [{ functionDeclarations: tools }],
        },
    });

    let text = response.text ?? "AI Error";
    let token = response.usageMetadata?.totalTokenCount ?? 0;

    const content = response.candidates?.at(0)?.content;
    const functionCall = content?.parts?.at(0)?.functionCall;

    if (response.text || !content || !functionCall) {
        return {
            text: text,
            tokens: token,
        };
    }

    text = `Calling function ${functionCall.name}`;

    const func = toolFunctions.find(
        (func) => func.name === functionCall.name,
    )?.function;
    if (!func) {
        return {
            text: text,
            tokens: token,
        };
    }

    const output = func(matrixData, functionCall.args);
    const functionResponse: FunctionResponse = {
        id: functionCall.id ?? "",
        name: functionCall.name ?? "",
        response: {
            output: JSON.stringify(output),
        },
    };

    const responseTool = await googleAI.models.generateContent({
        model: "gemini-3-flash-preview",
        contents: [
            ...contents,
            content,
            {
                role: "tool",
                parts: [
                    {
                        functionResponse: functionResponse,
                    },
                ],
            },
        ],
        config: {
            systemInstruction: JSON.stringify(instructions),
            toolConfig: {
                functionCallingConfig: {
                    mode: FunctionCallingConfigMode.AUTO,
                },
            },
            tools: [{ functionDeclarations: tools }],
        },
    });

    return {
        text: responseTool.text ?? "AI Error",
        tokens: token + (responseTool.usageMetadata?.totalTokenCount ?? 0),
    };
};

const getImageGemini = async (input: string): Promise<AIResponseImage> => {
    const response = await googleAI.models.generateContent({
        model: "gemini-2.5-flash-image",
        contents: input,
    });

    const firstCandidate = (response.candidates ?? [])[0];
    const parts = firstCandidate?.content?.parts ?? [];

    let buffer: Buffer<ArrayBuffer> | undefined = undefined;

    parts.forEach((part) => {
        if (part.inlineData) {
            const imageData = part.inlineData.data;
            if (!imageData) {
                return;
            }

            buffer = Buffer.from(imageData, "base64");
        }
    });

    return {
        image: buffer,
        tokens: response.usageMetadata?.totalTokenCount ?? 0,
    };
};

export { getTextGemini, getImageGemini };