diff --git a/.changeset/poor-rings-admire.md b/.changeset/poor-rings-admire.md new file mode 100644 index 00000000000..1b63c0138d0 --- /dev/null +++ b/.changeset/poor-rings-admire.md @@ -0,0 +1,6 @@ +--- +'firebase': minor +'@firebase/ai': minor +--- + +Added support for the URL context tool, which allows the model to access content from provided public web URLs to inform and enhance its responses. diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md index c1b570a7e05..a9187215465 100644 --- a/common/api-review/ai.api.md +++ b/common/api-review/ai.api.md @@ -448,6 +448,10 @@ export interface GenerateContentCandidate { index: number; // (undocumented) safetyRatings?: SafetyRating[]; + // Warning: (ae-incompatible-release-tags) The symbol "urlContextMetadata" is marked as @public, but its signature references "URLContextMetadata" which is marked as @beta + // + // (undocumented) + urlContextMetadata?: URLContextMetadata; } // @public @@ -596,6 +600,8 @@ export interface GoogleAIGenerateContentCandidate { index: number; // (undocumented) safetyRatings?: SafetyRating[]; + // (undocumented) + urlContextMetadata?: URLContextMetadata; } // Warning: (ae-internal-missing-underscore) The name "GoogleAIGenerateContentResponse" should be prefixed with an underscore because the declaration is marked as @internal @@ -1300,8 +1306,10 @@ export interface ThinkingConfig { thinkingBudget?: number; } +// Warning: (ae-incompatible-release-tags) The symbol "Tool" is marked as @public, but its signature references "URLContextTool" which is marked as @beta +// // @public -export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool; +export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool | URLContextTool; // @public export interface ToolConfig { @@ -1312,6 +1320,38 @@ export interface ToolConfig { // @public export type TypedSchema = IntegerSchema | NumberSchema | StringSchema | BooleanSchema | ObjectSchema | ArraySchema | AnyOfSchema; +// @beta +export interface URLContext { +} + +// @beta +export interface URLContextMetadata { + urlMetadata: URLMetadata[]; +} + +// @beta +export interface URLContextTool { + urlContext: URLContext; +} + +// @beta +export interface URLMetadata { + retrievedUrl?: string; + urlRetrievalStatus?: URLRetrievalStatus; +} + +// @beta +export const URLRetrievalStatus: { + URL_RETRIEVAL_STATUS_UNSPECIFIED: string; + URL_RETRIEVAL_STATUS_SUCCESS: string; + URL_RETRIEVAL_STATUS_ERROR: string; + URL_RETRIEVAL_STATUS_PAYWALL: string; + URL_RETRIEVAL_STATUS_UNSAFE: string; +}; + +// @beta +export type URLRetrievalStatus = (typeof URLRetrievalStatus)[keyof typeof URLRetrievalStatus]; + // @public export interface UsageMetadata { // (undocumented) @@ -1323,6 +1363,8 @@ export interface UsageMetadata { // (undocumented) promptTokensDetails?: ModalityTokenCount[]; thoughtsTokenCount?: number; + toolUsePromptTokenCount?: number; + toolUsePromptTokensDetails?: ModalityTokenCount[]; // (undocumented) totalTokenCount: number; } diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml index 364d5b992c9..04d65f6c333 100644 --- a/docs-devsite/_toc.yaml +++ b/docs-devsite/_toc.yaml @@ -202,6 +202,14 @@ toc: path: /docs/reference/js/ai.thinkingconfig.md - title: ToolConfig path: /docs/reference/js/ai.toolconfig.md + - title: URLContext + path: /docs/reference/js/ai.urlcontext.md + - title: URLContextMetadata + path: /docs/reference/js/ai.urlcontextmetadata.md + - title: URLContextTool + path: /docs/reference/js/ai.urlcontexttool.md + - title: URLMetadata + path: /docs/reference/js/ai.urlmetadata.md - title: UsageMetadata path: /docs/reference/js/ai.usagemetadata.md - title: VertexAIBackend diff --git a/docs-devsite/ai.generatecontentcandidate.md b/docs-devsite/ai.generatecontentcandidate.md index ca0383549a7..1691442ecfa 100644 --- a/docs-devsite/ai.generatecontentcandidate.md +++ b/docs-devsite/ai.generatecontentcandidate.md @@ -29,6 +29,7 @@ export interface GenerateContentCandidate | [groundingMetadata](./ai.generatecontentcandidate.md#generatecontentcandidategroundingmetadata) | [GroundingMetadata](./ai.groundingmetadata.md#groundingmetadata_interface) | | | [index](./ai.generatecontentcandidate.md#generatecontentcandidateindex) | number | | | [safetyRatings](./ai.generatecontentcandidate.md#generatecontentcandidatesafetyratings) | [SafetyRating](./ai.safetyrating.md#safetyrating_interface)\[\] | | +| [urlContextMetadata](./ai.generatecontentcandidate.md#generatecontentcandidateurlcontextmetadata) | [URLContextMetadata](./ai.urlcontextmetadata.md#urlcontextmetadata_interface) | | ## GenerateContentCandidate.citationMetadata @@ -85,3 +86,11 @@ index: number; ```typescript safetyRatings?: SafetyRating[]; ``` + +## GenerateContentCandidate.urlContextMetadata + +Signature: + +```typescript +urlContextMetadata?: URLContextMetadata; +``` diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index e4e382256b3..e47d35f3a24 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -134,6 +134,10 @@ The Firebase AI Web SDK. | [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. | | [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. | | [ToolConfig](./ai.toolconfig.md#toolconfig_interface) | Tool config. This config is shared for all tools provided in the request. | +| [URLContext](./ai.urlcontext.md#urlcontext_interface) | (Public Preview) Specifies the URL Context configuration. | +| [URLContextMetadata](./ai.urlcontextmetadata.md#urlcontextmetadata_interface) | (Public Preview) Metadata related to [URLContextTool](./ai.urlcontexttool.md#urlcontexttool_interface). | +| [URLContextTool](./ai.urlcontexttool.md#urlcontexttool_interface) | (Public Preview) A tool that allows you to provide additional context to the models in the form of public web URLs. By including URLs in your request, the Gemini model will access the content from those pages to inform and enhance its response. | +| [URLMetadata](./ai.urlmetadata.md#urlmetadata_interface) | (Public Preview) Metadata for a single URL retrieved by the [URLContextTool](./ai.urlcontexttool.md#urlcontexttool_interface) tool. | | [UsageMetadata](./ai.usagemetadata.md#usagemetadata_interface) | Usage metadata about a [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface). | | [VideoMetadata](./ai.videometadata.md#videometadata_interface) | Describes the input video content. | | [VoiceConfig](./ai.voiceconfig.md#voiceconfig_interface) | (Public Preview) Configuration for the voice to used in speech synthesis. | @@ -165,6 +169,7 @@ The Firebase AI Web SDK. | [POSSIBLE\_ROLES](./ai.md#possible_roles) | Possible roles. | | [ResponseModality](./ai.md#responsemodality) | (Public Preview) Generation modalities to be returned in generation responses. | | [SchemaType](./ai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | +| [URLRetrievalStatus](./ai.md#urlretrievalstatus) | (Public Preview) The status of a URL retrieval. | ## Type Aliases @@ -197,6 +202,7 @@ The Firebase AI Web SDK. | [SchemaType](./ai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | | [Tool](./ai.md#tool) | Defines a tool that model can call to access external knowledge. | | [TypedSchema](./ai.md#typedschema) | A type that includes all specific Schema types. | +| [URLRetrievalStatus](./ai.md#urlretrievalstatus) | (Public Preview) The status of a URL retrieval. | ## function(app, ...) @@ -752,6 +758,27 @@ SchemaType: { } ``` +## URLRetrievalStatus + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +The status of a URL retrieval. + +URL\_RETRIEVAL\_STATUS\_UNSPECIFIED: Unspecified retrieval status.
URL\_RETRIEVAL\_STATUS\_SUCCESS: The URL retrieval was successful.
URL\_RETRIEVAL\_STATUS\_ERROR: The URL retrieval failed.
URL\_RETRIEVAL\_STATUS\_PAYWALL: The URL retrieval failed because the content is behind a paywall.
URL\_RETRIEVAL\_STATUS\_UNSAFE: The URL retrieval failed because the content is unsafe.
+ +Signature: + +```typescript +URLRetrievalStatus: { + URL_RETRIEVAL_STATUS_UNSPECIFIED: string; + URL_RETRIEVAL_STATUS_SUCCESS: string; + URL_RETRIEVAL_STATUS_ERROR: string; + URL_RETRIEVAL_STATUS_PAYWALL: string; + URL_RETRIEVAL_STATUS_UNSAFE: string; +} +``` + ## AIErrorCode Standardized error codes that [AIError](./ai.aierror.md#aierror_class) can have. @@ -1031,7 +1058,7 @@ Defines a tool that model can call to access external knowledge. Signature: ```typescript -export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool; +export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool | URLContextTool; ``` ## TypedSchema @@ -1043,3 +1070,18 @@ A type that includes all specific Schema types. ```typescript export type TypedSchema = IntegerSchema | NumberSchema | StringSchema | BooleanSchema | ObjectSchema | ArraySchema | AnyOfSchema; ``` + +## URLRetrievalStatus + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +The status of a URL retrieval. + +URL\_RETRIEVAL\_STATUS\_UNSPECIFIED: Unspecified retrieval status.
URL\_RETRIEVAL\_STATUS\_SUCCESS: The URL retrieval was successful.
URL\_RETRIEVAL\_STATUS\_ERROR: The URL retrieval failed.
URL\_RETRIEVAL\_STATUS\_PAYWALL: The URL retrieval failed because the content is behind a paywall.
URL\_RETRIEVAL\_STATUS\_UNSAFE: The URL retrieval failed because the content is unsafe.
+ +Signature: + +```typescript +export type URLRetrievalStatus = (typeof URLRetrievalStatus)[keyof typeof URLRetrievalStatus]; +``` diff --git a/docs-devsite/ai.urlcontext.md b/docs-devsite/ai.urlcontext.md new file mode 100644 index 00000000000..435d278e4d1 --- /dev/null +++ b/docs-devsite/ai.urlcontext.md @@ -0,0 +1,22 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# URLContext interface +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +Specifies the URL Context configuration. + +Signature: + +```typescript +export interface URLContext +``` diff --git a/docs-devsite/ai.urlcontextmetadata.md b/docs-devsite/ai.urlcontextmetadata.md new file mode 100644 index 00000000000..bc260b997ad --- /dev/null +++ b/docs-devsite/ai.urlcontextmetadata.md @@ -0,0 +1,41 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# URLContextMetadata interface +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +Metadata related to [URLContextTool](./ai.urlcontexttool.md#urlcontexttool_interface). + +Signature: + +```typescript +export interface URLContextMetadata +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [urlMetadata](./ai.urlcontextmetadata.md#urlcontextmetadataurlmetadata) | [URLMetadata](./ai.urlmetadata.md#urlmetadata_interface)\[\] | (Public Preview) List of URL metadata used to provide context to the Gemini model. | + +## URLContextMetadata.urlMetadata + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +List of URL metadata used to provide context to the Gemini model. + +Signature: + +```typescript +urlMetadata: URLMetadata[]; +``` diff --git a/docs-devsite/ai.urlcontexttool.md b/docs-devsite/ai.urlcontexttool.md new file mode 100644 index 00000000000..6ecc2a323c1 --- /dev/null +++ b/docs-devsite/ai.urlcontexttool.md @@ -0,0 +1,41 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# URLContextTool interface +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +A tool that allows you to provide additional context to the models in the form of public web URLs. By including URLs in your request, the Gemini model will access the content from those pages to inform and enhance its response. + +Signature: + +```typescript +export interface URLContextTool +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [urlContext](./ai.urlcontexttool.md#urlcontexttoolurlcontext) | [URLContext](./ai.urlcontext.md#urlcontext_interface) | (Public Preview) Specifies the URL Context configuration. | + +## URLContextTool.urlContext + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +Specifies the URL Context configuration. + +Signature: + +```typescript +urlContext: URLContext; +``` diff --git a/docs-devsite/ai.urlmetadata.md b/docs-devsite/ai.urlmetadata.md new file mode 100644 index 00000000000..3cbd27632d0 --- /dev/null +++ b/docs-devsite/ai.urlmetadata.md @@ -0,0 +1,55 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# URLMetadata interface +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +Metadata for a single URL retrieved by the [URLContextTool](./ai.urlcontexttool.md#urlcontexttool_interface) tool. + +Signature: + +```typescript +export interface URLMetadata +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [retrievedUrl](./ai.urlmetadata.md#urlmetadataretrievedurl) | string | (Public Preview) The retrieved URL. | +| [urlRetrievalStatus](./ai.urlmetadata.md#urlmetadataurlretrievalstatus) | [URLRetrievalStatus](./ai.md#urlretrievalstatus) | (Public Preview) The status of the URL retrieval. | + +## URLMetadata.retrievedUrl + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +The retrieved URL. + +Signature: + +```typescript +retrievedUrl?: string; +``` + +## URLMetadata.urlRetrievalStatus + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +The status of the URL retrieval. + +Signature: + +```typescript +urlRetrievalStatus?: URLRetrievalStatus; +``` diff --git a/docs-devsite/ai.usagemetadata.md b/docs-devsite/ai.usagemetadata.md index 954fcc6e530..bf45610f4a1 100644 --- a/docs-devsite/ai.usagemetadata.md +++ b/docs-devsite/ai.usagemetadata.md @@ -27,6 +27,8 @@ export interface UsageMetadata | [promptTokenCount](./ai.usagemetadata.md#usagemetadataprompttokencount) | number | | | [promptTokensDetails](./ai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)\[\] | | | [thoughtsTokenCount](./ai.usagemetadata.md#usagemetadatathoughtstokencount) | number | The number of tokens used by the model's internal "thinking" process. | +| [toolUsePromptTokenCount](./ai.usagemetadata.md#usagemetadatatooluseprompttokencount) | number | The number of tokens used by tools. | +| [toolUsePromptTokensDetails](./ai.usagemetadata.md#usagemetadatatooluseprompttokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)\[\] | A list of tokens used by tools, broken down by modality. | | [totalTokenCount](./ai.usagemetadata.md#usagemetadatatotaltokencount) | number | | ## UsageMetadata.candidatesTokenCount @@ -71,6 +73,26 @@ The number of tokens used by the model's internal "thinking" process. thoughtsTokenCount?: number; ``` +## UsageMetadata.toolUsePromptTokenCount + +The number of tokens used by tools. + +Signature: + +```typescript +toolUsePromptTokenCount?: number; +``` + +## UsageMetadata.toolUsePromptTokensDetails + +A list of tokens used by tools, broken down by modality. + +Signature: + +```typescript +toolUsePromptTokensDetails?: ModalityTokenCount[]; +``` + ## UsageMetadata.totalTokenCount Signature: diff --git a/packages/ai/integration/generate-content.test.ts b/packages/ai/integration/generate-content.test.ts index a827a447d90..ffb1ecca698 100644 --- a/packages/ai/integration/generate-content.test.ts +++ b/packages/ai/integration/generate-content.test.ts @@ -17,6 +17,7 @@ import { expect } from 'chai'; import { + BackendType, Content, GenerationConfig, HarmBlockThreshold, @@ -25,11 +26,13 @@ import { Modality, Outcome, SafetySetting, + URLRetrievalStatus, getGenerativeModel } from '../src'; import { testConfigs, TOKEN_COUNT_DELTA } from './constants'; -describe('Generate Content', () => { +describe('Generate Content', function () { + this.timeout(20_000); testConfigs.forEach(testConfig => { describe(`${testConfig.toString()}`, () => { const commonGenerationConfig: GenerationConfig = { @@ -41,19 +44,19 @@ describe('Generate Content', () => { const commonSafetySettings: SafetySetting[] = [ { category: HarmCategory.HARM_CATEGORY_HARASSMENT, - threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE + threshold: HarmBlockThreshold.BLOCK_NONE }, { category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, - threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE + threshold: HarmBlockThreshold.BLOCK_NONE }, { category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, - threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE + threshold: HarmBlockThreshold.BLOCK_NONE }, { category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, - threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE + threshold: HarmBlockThreshold.BLOCK_NONE } ]; @@ -190,6 +193,132 @@ describe('Generate Content', () => { }); }); + describe('URL Context', async () => { + // URL Context is not supported in Google AI for gemini-2.0-flash + if ( + testConfig.ai.backend.backendType === BackendType.GOOGLE_AI && + testConfig.model === 'gemini-2.0-flash' + ) { + return; + } + + it('generateContent: url context', async () => { + const model = getGenerativeModel(testConfig.ai, { + model: testConfig.model, + generationConfig: commonGenerationConfig, + safetySettings: commonSafetySettings, + tools: [{ urlContext: {} }] + }); + + const result = await model.generateContent( + 'Summarize this website https://berkshirehathaway.com' + ); + const response = result.response; + const urlContextMetadata = + response.candidates?.[0].urlContextMetadata; + expect(urlContextMetadata?.urlMetadata).to.exist; + expect( + urlContextMetadata?.urlMetadata.length + ).to.be.greaterThanOrEqual(1); + expect(urlContextMetadata?.urlMetadata[0].retrievedUrl).to.exist; + expect(urlContextMetadata?.urlMetadata[0].retrievedUrl).to.equal( + 'https://berkshirehathaway.com' + ); + expect( + urlContextMetadata?.urlMetadata[0].urlRetrievalStatus + ).to.equal(URLRetrievalStatus.URL_RETRIEVAL_STATUS_SUCCESS); + + const usageMetadata = response.usageMetadata; + expect(usageMetadata).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.be.greaterThan(0); + }); + + it('generateContent: url context and google search grounding', async () => { + const model = getGenerativeModel(testConfig.ai, { + model: testConfig.model, + generationConfig: commonGenerationConfig, + safetySettings: commonSafetySettings, + tools: [{ urlContext: {} }, { googleSearch: {} }] + }); + + const result = await model.generateContent( + 'According to https://info.cern.ch/hypertext/WWW/TheProject.html, what is the WorldWideWeb? Search the web for other definitions.' + ); + const response = result.response; + const trimmedText = response.text().trim(); + const urlContextMetadata = + response.candidates?.[0].urlContextMetadata; + const groundingMetadata = response.candidates?.[0].groundingMetadata; + expect(trimmedText).to.contain( + 'hypermedia information retrieval initiative' + ); + expect(urlContextMetadata?.urlMetadata).to.exist; + expect( + urlContextMetadata?.urlMetadata.length + ).to.be.greaterThanOrEqual(1); + expect(urlContextMetadata?.urlMetadata[0].retrievedUrl).to.exist; + expect(urlContextMetadata?.urlMetadata[0].retrievedUrl).to.equal( + 'https://info.cern.ch/hypertext/WWW/TheProject.html' + ); + expect( + urlContextMetadata?.urlMetadata[0].urlRetrievalStatus + ).to.equal(URLRetrievalStatus.URL_RETRIEVAL_STATUS_SUCCESS); + expect(groundingMetadata).to.exist; + expect(groundingMetadata?.groundingChunks).to.exist; + expect( + groundingMetadata?.groundingChunks!.length + ).to.be.greaterThanOrEqual(1); + expect( + groundingMetadata?.groundingSupports!.length + ).to.be.greaterThanOrEqual(1); + + const usageMetadata = response.usageMetadata; + expect(usageMetadata).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.be.greaterThan(0); + }); + + it('generateContent: url context and google search grounding without URLs in prompt', async () => { + const model = getGenerativeModel(testConfig.ai, { + model: testConfig.model, + generationConfig: commonGenerationConfig, + safetySettings: commonSafetySettings, + tools: [{ urlContext: {} }, { googleSearch: {} }] + }); + + const result = await model.generateContent( + 'Recommend 3 books for beginners to read to learn more about the latest advancements in Quantum Computing.' + ); + const response = result.response; + const urlContextMetadata = + response.candidates?.[0].urlContextMetadata; + const groundingMetadata = response.candidates?.[0].groundingMetadata; + if (testConfig.ai.backend.backendType === BackendType.GOOGLE_AI) { + expect(urlContextMetadata?.urlMetadata).to.exist; + expect( + urlContextMetadata?.urlMetadata.length + ).to.be.greaterThanOrEqual(1); + expect(urlContextMetadata?.urlMetadata[0].retrievedUrl).to.exist; + expect( + urlContextMetadata?.urlMetadata[0].urlRetrievalStatus + ).to.equal(URLRetrievalStatus.URL_RETRIEVAL_STATUS_SUCCESS); + expect(groundingMetadata).to.exist; + expect(groundingMetadata?.groundingChunks).to.exist; + + const usageMetadata = response.usageMetadata; + expect(usageMetadata).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.exist; + expect(usageMetadata?.toolUsePromptTokenCount).to.be.greaterThan(0); + } else { + // URL Context does not integrate with Google Search Grounding in Vertex AI + expect(urlContextMetadata?.urlMetadata).to.not.exist; + expect(groundingMetadata).to.exist; + expect(groundingMetadata?.groundingChunks).to.exist; + } + }); + }); + it('generateContent: code execution', async () => { const model = getGenerativeModel(testConfig.ai, { model: testConfig.model, diff --git a/packages/ai/src/googleai-mappers.ts b/packages/ai/src/googleai-mappers.ts index 23c238c1e3b..b3e4206f33c 100644 --- a/packages/ai/src/googleai-mappers.ts +++ b/packages/ai/src/googleai-mappers.ts @@ -193,7 +193,8 @@ export function mapGenerateContentCandidates( finishMessage: candidate.finishMessage, safetyRatings: mappedSafetyRatings, citationMetadata, - groundingMetadata: candidate.groundingMetadata + groundingMetadata: candidate.groundingMetadata, + urlContextMetadata: candidate.urlContextMetadata }; mappedCandidates.push(mappedCandidate); }); diff --git a/packages/ai/src/methods/generate-content.test.ts b/packages/ai/src/methods/generate-content.test.ts index 3bb396ac6d8..f75c87e4dd9 100644 --- a/packages/ai/src/methods/generate-content.test.ts +++ b/packages/ai/src/methods/generate-content.test.ts @@ -249,6 +249,57 @@ describe('generateContent()', () => { false, match.any ); + + it('url context', async () => { + const mockResponse = getMockResponse( + 'vertexAI', + 'unary-success-url-context.json' + ); + const makeRequestStub = stub(request, 'makeRequest').resolves( + mockResponse as Response + ); + const result = await generateContent( + fakeApiSettings, + 'model', + fakeRequestParams + ); + expect(result.response.text()).to.include( + 'The temperature is 67°F (19°C)' + ); + const groundingMetadata = + result.response.candidates?.[0].groundingMetadata; + expect(groundingMetadata).to.not.be.undefined; + expect(groundingMetadata!.searchEntryPoint?.renderedContent).to.contain( + 'div' + ); + expect(groundingMetadata!.groundingChunks?.length).to.equal(2); + expect(groundingMetadata!.groundingChunks?.[0].web?.uri).to.contain( + 'https://vertexaisearch.cloud.google.com' + ); + expect(groundingMetadata!.groundingChunks?.[0].web?.title).to.equal( + 'accuweather.com' + ); + expect(groundingMetadata!.groundingSupports?.length).to.equal(3); + expect( + groundingMetadata!.groundingSupports?.[0].groundingChunkIndices + ).to.deep.equal([0]); + expect(groundingMetadata!.groundingSupports?.[0].segment).to.deep.equal({ + endIndex: 56, + text: 'The current weather in London, United Kingdom is cloudy.' + }); + expect(groundingMetadata!.groundingSupports?.[0].segment?.partIndex).to.be + .undefined; + expect(groundingMetadata!.groundingSupports?.[0].segment?.startIndex).to + .be.undefined; + + expect(makeRequestStub).to.be.calledWith( + 'model', + Task.GENERATE_CONTENT, + fakeApiSettings, + false, + match.any + ); + }); }); it('codeExecution', async () => { const mockResponse = getMockResponse( diff --git a/packages/ai/src/requests/stream-reader.ts b/packages/ai/src/requests/stream-reader.ts index c3a35b1da4a..3b205efabf2 100644 --- a/packages/ai/src/requests/stream-reader.ts +++ b/packages/ai/src/requests/stream-reader.ts @@ -28,7 +28,7 @@ import { createEnhancedContentResponse } from './response-helpers'; import * as GoogleAIMapper from '../googleai-mappers'; import { GoogleAIGenerateContentResponse } from '../types/googleai'; import { ApiSettings } from '../types/internal'; -import { BackendType } from '../public-types'; +import { BackendType, URLContextMetadata } from '../public-types'; const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/; @@ -193,6 +193,20 @@ export function aggregateResponses( aggregatedResponse.candidates[i].groundingMetadata = candidate.groundingMetadata; + // The urlContextMetadata object is defined in the first chunk of the response stream. + // In all subsequent chunks, the urlContextMetadata object will be undefined. We need to + // make sure that we don't overwrite the first value urlContextMetadata object with undefined. + // FIXME: What happens if we receive a second, valid urlContextMetadata object? + const urlContextMetadata = candidate.urlContextMetadata as unknown; + if ( + typeof urlContextMetadata === 'object' && + urlContextMetadata !== null && + Object.keys(urlContextMetadata).length > 0 + ) { + aggregatedResponse.candidates[i].urlContextMetadata = + urlContextMetadata as URLContextMetadata; + } + /** * Candidates should always have content and parts, but this handles * possible malformed responses. diff --git a/packages/ai/src/types/googleai.ts b/packages/ai/src/types/googleai.ts index 38c27b3fe8b..eb282b094fc 100644 --- a/packages/ai/src/types/googleai.ts +++ b/packages/ai/src/types/googleai.ts @@ -23,7 +23,8 @@ import { GroundingMetadata, PromptFeedback, SafetyRating, - UsageMetadata + UsageMetadata, + URLContextMetadata } from '../public-types'; import { Content, Part } from './content'; @@ -60,6 +61,7 @@ export interface GoogleAIGenerateContentCandidate { safetyRatings?: SafetyRating[]; citationMetadata?: GoogleAICitationMetadata; groundingMetadata?: GroundingMetadata; + urlContextMetadata?: URLContextMetadata; } /** diff --git a/packages/ai/src/types/requests.ts b/packages/ai/src/types/requests.ts index 0ce87d0c8da..143d772a506 100644 --- a/packages/ai/src/types/requests.ts +++ b/packages/ai/src/types/requests.ts @@ -242,7 +242,8 @@ export interface RequestOptions { export type Tool = | FunctionDeclarationsTool | GoogleSearchTool - | CodeExecutionTool; + | CodeExecutionTool + | URLContextTool; /** * Structured representation of a function declaration as defined by the @@ -319,6 +320,27 @@ export interface CodeExecutionTool { */ export interface GoogleSearch {} +/** + * A tool that allows you to provide additional context to the models in the form of public web + * URLs. By including URLs in your request, the Gemini model will access the content from those + * pages to inform and enhance its response. + * + * @beta + */ +export interface URLContextTool { + /** + * Specifies the URL Context configuration. + */ + urlContext: URLContext; +} + +/** + * Specifies the URL Context configuration. + * + * @beta + */ +export interface URLContext {} + /** * A `FunctionDeclarationsTool` is a piece of code that enables the system to * interact with external systems to perform an action, or set of actions, diff --git a/packages/ai/src/types/responses.ts b/packages/ai/src/types/responses.ts index 4a01e79a77c..8b8e1351675 100644 --- a/packages/ai/src/types/responses.ts +++ b/packages/ai/src/types/responses.ts @@ -116,8 +116,16 @@ export interface UsageMetadata { */ thoughtsTokenCount?: number; totalTokenCount: number; + /** + * The number of tokens used by tools. + */ + toolUsePromptTokenCount?: number; promptTokensDetails?: ModalityTokenCount[]; candidatesTokensDetails?: ModalityTokenCount[]; + /** + * A list of tokens used by tools, broken down by modality. + */ + toolUsePromptTokensDetails?: ModalityTokenCount[]; } /** @@ -160,6 +168,7 @@ export interface GenerateContentCandidate { safetyRatings?: SafetyRating[]; citationMetadata?: CitationMetadata; groundingMetadata?: GroundingMetadata; + urlContextMetadata?: URLContextMetadata; } /** @@ -349,6 +358,94 @@ export interface Segment { text: string; } +/** + * Metadata related to {@link URLContextTool}. + * + * @beta + */ +export interface URLContextMetadata { + /** + * List of URL metadata used to provide context to the Gemini model. + */ + urlMetadata: URLMetadata[]; +} + +/** + * Metadata for a single URL retrieved by the {@link URLContextTool} tool. + * + * @beta + */ +export interface URLMetadata { + /** + * The retrieved URL. + */ + retrievedUrl?: string; + /** + * The status of the URL retrieval. + */ + urlRetrievalStatus?: URLRetrievalStatus; +} + +/** + * The status of a URL retrieval. + * + * @remarks + * URL_RETRIEVAL_STATUS_UNSPECIFIED: Unspecified retrieval status. + *
+ * URL_RETRIEVAL_STATUS_SUCCESS: The URL retrieval was successful. + *
+ * URL_RETRIEVAL_STATUS_ERROR: The URL retrieval failed. + *
+ * URL_RETRIEVAL_STATUS_PAYWALL: The URL retrieval failed because the content is behind a paywall. + *
+ * URL_RETRIEVAL_STATUS_UNSAFE: The URL retrieval failed because the content is unsafe. + *
+ * + * @beta + */ +export const URLRetrievalStatus = { + /** + * Unspecified retrieval status. + */ + URL_RETRIEVAL_STATUS_UNSPECIFIED: 'URL_RETRIEVAL_STATUS_UNSPECIFIED', + /** + * The URL retrieval was successful. + */ + URL_RETRIEVAL_STATUS_SUCCESS: 'URL_RETRIEVAL_STATUS_SUCCESS', + /** + * The URL retrieval failed. + */ + URL_RETRIEVAL_STATUS_ERROR: 'URL_RETRIEVAL_STATUS_ERROR', + /** + * The URL retrieval failed because the content is behind a paywall. + */ + URL_RETRIEVAL_STATUS_PAYWALL: 'URL_RETRIEVAL_STATUS_PAYWALL', + /** + * The URL retrieval failed because the content is unsafe. + */ + URL_RETRIEVAL_STATUS_UNSAFE: 'URL_RETRIEVAL_STATUS_UNSAFE' +}; + +/** + * The status of a URL retrieval. + * + * @remarks + * URL_RETRIEVAL_STATUS_UNSPECIFIED: Unspecified retrieval status. + *
+ * URL_RETRIEVAL_STATUS_SUCCESS: The URL retrieval was successful. + *
+ * URL_RETRIEVAL_STATUS_ERROR: The URL retrieval failed. + *
+ * URL_RETRIEVAL_STATUS_PAYWALL: The URL retrieval failed because the content is behind a paywall. + *
+ * URL_RETRIEVAL_STATUS_UNSAFE: The URL retrieval failed because the content is unsafe. + *
+ * + * @beta + */ +export type URLRetrievalStatus = + (typeof URLRetrievalStatus)[keyof typeof URLRetrievalStatus]; + /** * @public */