Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export LANGCHAIN_API_KEY=ls-...

## Repo Structure

The package is located within [langchain_benchmarks](./langchain_benchmarks/). Check out the [docs](https://langchain-ai.github.io/langchain-benchmarks/index.html) for information on how to get starte.
The package is located within [langchain_benchmarks](./langchain_benchmarks/). Check out the [docs](https://langchain-ai.github.io/langchain-benchmarks/index.html) for information on how to get started.

The other directories are legacy and may be moved in the future.

Expand Down
113 changes: 65 additions & 48 deletions docs/source/notebooks/datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "58b94f6d-0c91-4361-9b22-f758ffaa150a",
"metadata": {
"tags": []
Expand Down Expand Up @@ -79,7 +79,7 @@
],
"source": [
"download_public_dataset(\n",
" \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\"\n",
" \"https://smith.langchain.com/public/59577193-8938-4ccf-92a7-e8a96bcf4f86/examples\"\n",
")"
]
},
Expand All @@ -93,7 +93,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650",
"metadata": {
"tags": []
Expand All @@ -103,56 +103,73 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[\n",
" {\n",
" \"created_at\": \"2023-11-15T15:26:53.511629\",\n",
" \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
" \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n",
" \"inputs\": {\n",
" \"question\": \"do bob and alice live in the same city?\"\n",
" },\n",
" \"modified_at\": \"2023-11-15T15:26:53.511629\",\n",
" \"outputs\": {\n",
" \"expected_steps\": [\n",
" \"find_users_by_name\",\n",
" \"get_user_location\",\n",
" \"get_city_for_location\",\n",
" \"get_user_location\",\n",
" \"get_city_for_location\"\n",
" ],\n",
" \"order_matters\": false,\n",
" \"reference\": \"no\"\n",
" },\n",
" \"runs\": []\n",
" },\n",
" {\n",
" \"created_at\": \"2023-11-15T15:26:53.491359\",\n",
" \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
" \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n",
" \"inputs\": {\n",
" \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n",
" },\n",
" \"modified_at\": \"2023-11-15T15:26:53.491359\",\n",
" \"outputs\": {\n",
" \"expected_steps\": [\n",
" \"find_users_by_name\",\n",
" \"get_user_location\",\n",
" \"get_current_time_for_location\",\n",
" \"get_current_weather_for_location\"\n",
" ],\n",
" \"order_matters\": false,\n",
" \"reference\": \"yes\"\n",
" },\n",
" \"runs\": []\n",
" }\n",
"]\n"
]
"[\n",
" {\n",
" \"created_at\": \"2023-11-21T19:34:17.103178+00:00\",\n",
" \"dataset_id\": \"82ca6840-cf23-4bb0-a9be-55237ebbe9d3\",\n",
" \"id\": \"c17e9d5a-b9f8-43dc-b5a9-6e45d21c9a2a\",\n",
" \"inputs\": {\n",
" \"question\": \"communication\"\n",
" },\n",
" \"metadata\": null,\n",
" \"modified_at\": \"2023-11-21T19:34:17.103178+00:00\",\n",
" \"outputs\": {\n",
" \"expected_steps\": [\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\"\n",
" ],\n",
" \"order_matters\": false,\n",
" \"reference\": \"communication\"\n",
" },\n",
" \"runs\": []\n",
" },\n",
" {\n",
" \"created_at\": \"2023-11-21T19:34:17.007329+00:00\",\n",
" \"dataset_id\": \"82ca6840-cf23-4bb0-a9be-55237ebbe9d3\",\n",
" \"id\": \"57e29316-e258-4ed9-bbeb-b23c8bcb4bd2\",\n",
" \"inputs\": {\n",
" \"question\": \"information\"\n",
" },\n",
" \"metadata\": null,\n",
" \"modified_at\": \"2023-11-21T19:34:17.007329+00:00\",\n",
" \"outputs\": {\n",
" \"expected_steps\": [\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\",\n",
" \"type_letter\"\n",
" ],\n",
" \"order_matters\": false,\n",
" \"reference\": \"information\"\n",
" },\n",
" \"runs\": []\n",
" }\n",
"]\n"
]
}
],
"source": [
"import json\n",
"\n",
"with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n",
"with open(\"./59577193-8938-4ccf-92a7-e8a96bcf4f86.json\", \"r\", encoding=\"utf-8\") as f:\n",
" print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))"
]
},
Expand Down
6 changes: 5 additions & 1 deletion docs/source/notebooks/tool_usage/intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"{\n",
" \"output\": \"It's super sunny. Like 75F\", // the output from the agent\n",
" \"intermediate_steps\": [... \"find_locations_by_name\" ...], // list of the intermediate steps taken by the agent (see format in LangChain)\n",
" \"state\": .., // Can be anything, this is the state fo the environment after the agent has taken all of its actions (optional key)\n",
" \"state\": .., // Can be anything, this is the state of the environment after the agent has taken all of its actions (optional key)\n",
"}\n",
"```"
]
Expand Down Expand Up @@ -222,6 +222,10 @@
"\n",
"---------\n",
"```python\n",
"import dataclasses\n",
"from typing import Any, Callable, List, Optional\n",
"\n",
"from langchain.tools import BaseTool\n",
"\n",
"@dataclasses.dataclass(frozen=True)\n",
"class ToolUsageEnvironment:\n",
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/tool_usage/tasks/type_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _read_state() -> Any:
The objective of this task is to evaluate the ability of the model to use the provided \
tools to repeat a given input string.

For example, if the string is 'abc', the tools 'a', 'b', and 'c' must be invoked \
For example, if the string is 'abc', the tool with argument 'a', 'b', and 'c' must be invoked \
in that order.

The dataset includes examples of varying difficulty. The difficulty is measured \
Expand Down