diff --git a/config/_default/menus/main.en.yaml b/config/_default/menus/main.en.yaml index f28109b65b7e2..6aecd2418714e 100644 --- a/config/_default/menus/main.en.yaml +++ b/config/_default/menus/main.en.yaml @@ -1483,11 +1483,6 @@ menu: identifier: bits_ai_sre_investigate_alerts parent: bits_ai_sre weight: 100 - - name: Coordinate incidents - url: bits_ai/bits_ai_sre/coordinate_incidents - identifier: bits_ai_sre_coordinate_incidents - parent: bits_ai_sre - weight: 101 - name: Bits AI Dev Agent url: bits_ai/bits_ai_dev_agent parent: bits_ai @@ -2480,46 +2475,51 @@ menu: parent: incident_investigate identifier: incident_investigate_follow_ups weight: 502 + - name: Incident AI + url: service_management/incident_management/incident_ai + identifier: incident_ai + parent: incidents + weight: 6 - name: Incident Settings url: service_management/incident_management/incident_settings parent: incidents identifier: incidents_settings - weight: 6 + weight: 7 - name: Information url: service_management/incident_management/incident_settings/information parent: incidents_settings identifier: incidents_settings_information - weight: 601 + weight: 701 - name: Property Fields url: service_management/incident_management/incident_settings/property_fields parent: incidents_settings identifier: incidents_settings_property_fields - weight: 602 + weight: 702 - name: Responder Types url: service_management/incident_management/incident_settings/responder_types parent: incidents_settings identifier: incidents_settings_responder_types - weight: 603 + weight: 703 - name: Integrations url: service_management/incident_management/incident_settings/integrations parent: incidents_settings identifier: incidents_settings_integrations - weight: 604 + weight: 704 - name: Notification Rules url: service_management/incident_management/incident_settings/notification_rules parent: incidents_settings identifier: incidents_settings_notification_rules - weight: 605 + weight: 705 - name: Templates url: service_management/incident_management/incident_settings/templates parent: incidents_settings identifier: incidents_settings_templates - weight: 606 + weight: 706 - name: Incident Analytics url: service_management/incident_management/analytics parent: incidents identifier: analytics - weight: 7 + weight: 8 - name: Integrations url: service_management/incident_management/integrations parent: incidents diff --git a/config/_default/params.yaml b/config/_default/params.yaml index 5dc983c967be2..4abcd8747187d 100644 --- a/config/_default/params.yaml +++ b/config/_default/params.yaml @@ -301,6 +301,7 @@ unsupported_sites: fleet_automation: [gov] high_availability_datadog_agent: [gov] iac_security: [gov] + incident_ai: [gov] internal_developer_portal: [gov] live_debugger: [gov] llm_observability: [gov] diff --git a/content/en/bits_ai/_index.md b/content/en/bits_ai/_index.md index db2ca8a579780..4a50d0dc99ee4 100644 --- a/content/en/bits_ai/_index.md +++ b/content/en/bits_ai/_index.md @@ -25,7 +25,7 @@ Bits AI is your agentic teammate in Datadog, built to automate development, secu ## Features {{< whatsnext desc="Learn about how you can use Bits AI:" >}} - {{< nextlink href="bits_ai/bits_ai_sre" >}}Investigate alerts and coordinate incidents proactively with Bits AI SRE{{< /nextlink >}} + {{< nextlink href="bits_ai/bits_ai_sre" >}}Investigate alerts with Bits AI SRE{{< /nextlink >}} {{< nextlink href="bits_ai/bits_ai_dev_agent" >}}Automate code fixes with Bits AI Dev Agent{{< /nextlink >}} {{< nextlink href="actions/action_interface" >}}Take action on your systems with Action Interface{{< /nextlink >}} @@ -36,6 +36,3 @@ Bits AI is your agentic teammate in Datadog, built to automate development, secu ## Further reading {{< partial name="whats-next/whats-next.html" >}} - -[3]: /service_management/incident_management -[4]: /bits_ai/bits_ai_sre/coordinate_incidents/ \ No newline at end of file diff --git a/content/en/bits_ai/bits_ai_sre/_index.md b/content/en/bits_ai/bits_ai_sre/_index.md index a782076cc4b7b..9e4347a9de99d 100644 --- a/content/en/bits_ai/bits_ai_sre/_index.md +++ b/content/en/bits_ai/bits_ai_sre/_index.md @@ -1,17 +1,20 @@ --- title: Bits AI SRE -description: "Learn how Bits AI SRE autonomously investigates alerts and coordinates incident response to improve on-call operations." +description: "Learn how Bits AI SRE autonomously investigates alerts to improve on-call operations." further_reading: - link: "https://www.datadoghq.com/blog/bits-ai-sre/" tag: "Blog" text: "Introducing Bits AI SRE, your AI on-call teammate" + - link: "/service_management/incident_management/incident_ai" + tag: "Documentation" + text: "Coordinate incidents with Incident AI" cascade: site_support_id: bits_ai_sre --- ## Overview -Bits AI SRE is an autonomous AI agent that investigates alerts and coordinates incident response. When a monitor triggers, Bits proactively generates multiple hypotheses, queries relevant telemetry, and reasons over the data to help on-call engineers quickly identify the root cause. If the alert escalates to an incident, Bits supports the response by managing stakeholder communications, surfacing relevant knowledge base content, highlighting related incidents, and accelerating the postmortem and incident follow-up process. By reducing manual effort, Bits ensures smoother and more efficient on-call operations. +Bits AI SRE is an autonomous AI agent that investigates alerts. When a monitor triggers, Bits AI proactively generates multiple hypotheses, queries relevant telemetry, and reasons over the data to help on-call engineers quickly identify the root cause. {{< img src="bits_ai/overview.png" alt="Bits AI analysis on a monitor alert" style="width:100%;" >}} @@ -19,9 +22,8 @@ Bits AI SRE is an autonomous AI agent that investigates alerts and coordinates i {{< whatsnext desc="Learn about how you can use Bits AI SRE:" >}} {{< nextlink href="bits_ai/bits_ai_sre/investigate_alerts" >}}Investigate alerts{{< /nextlink >}} - {{< nextlink href="bits_ai/bits_ai_sre/coordinate_incidents" >}}Coordinate incidents{{< /nextlink >}} {{< /whatsnext >}} ## Further reading -{{< partial name="whats-next/whats-next.html" >}} \ No newline at end of file +{{< partial name="whats-next/whats-next.html" >}} diff --git a/content/en/bits_ai/chat_with_bits_ai.md b/content/en/bits_ai/chat_with_bits_ai.md index 88f47a30cc93e..1bfb4e37b0846 100644 --- a/content/en/bits_ai/chat_with_bits_ai.md +++ b/content/en/bits_ai/chat_with_bits_ai.md @@ -5,9 +5,9 @@ further_reading: - link: "bits_ai/" tag: "Documentation" text: "Bits AI Overview" -- link: "bits_ai/bits_ai_sre/coordinate_incidents/" +- link: "/service_management/incident_management/incident_ai" tag: "Documentation" - text: "Coordinate Incidents" + text: "Coordinate incidents with Incident AI" aliases: - /bits_ai/getting_started/ --- diff --git a/content/en/bits_ai/bits_ai_sre/coordinate_incidents.md b/content/en/service_management/incident_management/incident_ai.md similarity index 59% rename from content/en/bits_ai/bits_ai_sre/coordinate_incidents.md rename to content/en/service_management/incident_management/incident_ai.md index c283aefd8c77b..57d996c6c18d6 100644 --- a/content/en/bits_ai/bits_ai_sre/coordinate_incidents.md +++ b/content/en/service_management/incident_management/incident_ai.md @@ -1,39 +1,47 @@ --- -title: Coordinate Incidents -description: "Use Bits AI SRE to streamline incident response workflow with proactive updates, stakeholder notifications, and AI-assisted postmortems." -further_reading: -- link: "https://www.datadoghq.com/blog/bits-ai-for-incident-management/" - tag: "Blog" - text: "Stay up to date on the latest incidents with Bits AI" +title: Incident AI +description: "Learn how to use Incident AI to automate incident coordination, generate summaries, send notifications, and create AI-assisted postmortems." aliases: - /bits_ai/managing_incidents/ +- /bits_ai/bits_ai_sre/coordinate_incidents +further_reading: +- link: "/bits_ai/bits_ai_sre/" + tag: "Documentation" + text: "Learn about Bits AI SRE" --- -In the heat of an incident, Bits AI SRE handles general incident coordination so you can focus on resolution. From proactive updates to AI-assisted postmortems, Bits streamlines your incident response workflow in Slack and Datadog. +## Overview -## Get started with incident coordination +Incident AI transforms how your team manages incidents by automating coordination tasks and providing intelligent insights throughout the incident lifecycle. Built into Datadog Incident Management, it works in Slack and the Datadog platform to help you respond faster and learn from every incident. -Bits AI SRE helps coordinate incidents—especially those involving multiple teams—by suggesting next steps throughout the incident lifecycle. This streamlines communication and improves overall process management. +Key capabilities include: +- **Incident summaries**: Get context-aware summaries when you join incident channels. +- **Related incident detection**: Automatic detection of related incidents to identify systemic issues and recurring issues. +- **Request information or take action**: Declare incidents, update severity and status, search incident history, and more—all through conversational prompts in Slack. +- **AI-enhanced notifications**: Dynamically populate stakeholder updates with AI-generated summaries of contributing factors, impact, and remediation across email, MS Teams, Slack, and other channels. +- **Automated follow-ups**: Incident AI collects action items mentioned during incidents and suggests them as follow-up tasks when the incident is resolved. +- **Intelligent postmortems**: Generate comprehensive first drafts with AI-powered sections covering executive summaries, timelines, customer impact, and lessons learned—giving responders a strong foundation to build on. + +## Get started with incident coordination -
These features require Datadog Incident Management.
+Incident AI helps coordinate incidents—especially those involving multiple teams—by suggesting next steps throughout the incident lifecycle. This streamlines communication and improves overall process management. 1. Connect Datadog to Slack. - 1. In Slack, run the `/dd connect` command. - 1. Follow the on-screen prompts to complete the connection process. + 1. In any Slack channel, run the `/dd connect` command. + 1. Follow the on-screen prompts to complete the connection process. 1. Enable the Slack integration in Datadog Incident Management. 1. In the [Integrations][4] section of the Incidents settings page, find the **Slack** settings. 1. Enable the following toggles: - **Push Slack channel messages to the incident timeline** - - **Activate Bits AI features in incident Slack channels for your organization**
- **Note**: Bits AI's incident management features can only be activated for one Datadog organization within a single Slack workspace. - {{< img src="bits_ai/coordinate_incidents_slack_settings.png" alt="Slack integration settings with the specified toggles enabled" style="width:100%;" >}} -1. To interact with Bits AI in a Slack channel, invite it by running the `@Datadog` command. + - **Activate Incident AI features in incident Slack channels for your organization**
+ **Note**: Incident AI's incident management features can only be activated for one Datadog organization within a single Slack workspace. +1. To interact with Incident AI in a Slack channel, invite it by running the `@Datadog` command. -## Customize stakeholder notifications +## Customize stakeholder notifications -Bits can dynamically populate key details in stakeholder notifications—delivering clearer, faster updates across the tools your team already uses. Notification rules support delivery to a wide variety of destinations, including email, Datadog On-Call, MS Teams, Slack, and more, ensuring AI-enhanced updates reach the right people—on the right platform—at the right time. +Incident AI can dynamically populate key details in stakeholder notifications, delivering clearer, faster updates across the tools your team already uses. Notification rules support delivery to a wide variety of destinations, including email, Datadog On-Call, MS Teams, Slack, and more, ensuring AI-enhanced updates reach the right people, on the right platform, at the right time. -1. In your Incidents settings, go to [Message Templates][1]. +1. In your Incidents settings, go to [Notification Templates][1]. 1. Create a new template or edit an existing one. 1. In the message body, insert any of the following AI variables: @@ -62,7 +70,7 @@ Bits can dynamically populate key details in stakeholder notifications—deliver
- {{< img src="bits_ai/message_template_variables.png" alt="New message template with AI variables in it" style="width:100%;" >}} + {{< img src="service_management/incidents/incident_ai/message_template_variables.png" alt="New message template with AI variables in it" style="width:100%;" >}} 1. Click **Save** to save the template. 1. Go to your incident [Notification Rules][2]. 1. Click **New Rule**. @@ -71,19 +79,27 @@ Bits can dynamically populate key details in stakeholder notifications—deliver ## Proactive incident summaries -When you join an incident channel in Slack (connected to Datadog Incident Management), Bits automatically posts a summary containing key information about the incident such as the contributing factors, impact, issue, and remediation. This summary is only visible to you. +When you join an incident channel in Slack (connected to Datadog Incident Management), Incident AI automatically posts a summary containing key information about the incident such as the contributing factors, impact, issue, and remediation. This summary is only visible to you. -When an incident is changed to resolved, Bits posts a final summary. This is visible to everyone in the channel. +When an incident is changed to resolved, Incident AI posts a final summary. This is visible to everyone in the channel. -{{< img src="bits_ai/incident_summary.png" alt="Example incident summary in Slack" style="width:100%;" >}} +{{< img src="service_management/incidents/incident_ai/incident_summary.png" alt="Example incident summary in Slack" style="width:100%;" >}} + +## Proactive follow-up task suggestion + +After an incident is resolved, Incident AI collects any follow-up tasks responders mentioned during the incident. It then prompts you to review and create them with a single click. These tasks are saved as Incident Follow-Ups in Datadog Incident Management. For more information, see [Incident Follow-ups][5]. + +To view suggested follow-up tasks: +1. Navigate to the relevant incident in Datadog. +1. Open the **Remediation** tab to view a list of all follow-up tasks you've saved from Slack. ## Related incident detection -Bits automatically flags related incidents if they are declared within 20 minutes of each other, helping you identify broader systemic issues. +Incident AI automatically flags related incidents if they are declared within 20 minutes of each other, helping you identify broader systemic issues. -## Chat with Bits AI SRE about incidents +## Chat with Incident AI -Use natural language prompts to request for information or take action from Slack: +Use natural language prompts to request information or take action from Slack: | Functionality | Example prompt | |------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------| @@ -92,18 +108,10 @@ Use natural language prompts to request for information or take action from Slac | Change status | `@Datadog Mark this incident as stable`
`@Datadog Resolve this incident` | | Request new summary | `@Datadog Give me a summary of this incident`
`@Datadog Summarize incident-262`
**Note**: Private incidents are not summarized. | | Search incident history | `@Datadog How many incidents are currently ongoing?`
`@Datadog Show me all Sev-1 incidents that occurred in the past week.` | -| Dive into specific incidents | `@Datadog What was the root cause of incident-123?` Or
`@Datadog What remediation actions did the responders take in incident-123?` | -| Find related incidents | `@Datadog Are there any related incidents?`
`@Datadog Find me incidents related to DDOS attacks from the past month` | +| Dive into specific incidents | `@Datadog What was the root cause of incident-123?`
`@Datadog What remediation actions did the responders take in incident-123?` | +| Find related incidents | `@Datadog Are there any related incidents?`
`@Datadog Find me incidents related to DDoS attacks from the past month` | | Early detection inquiry | `@Datadog A customer is unable to check out. Is there an incident?`
`@Datadog Are there any incidents now impacting the payments service?` | -## Proactive follow-up task suggestion - -After an incident is resolved, Bits collects any follow-up tasks responders mentioned during the incident. It then prompts you to review and create them with a single click. These tasks are saved as Incident Follow-Ups in Datadog Incident Management. - -To view suggested follow-up tasks: -1. Navigate to the relevant incident in Datadog. -1. Open the **Remediation** tab to view a list of all follow-up tasks you've saved from Slack. - ## Customize postmortem templates with AI incident variables 1. In Datadog, navigate to your incident [Postmortem Templates][3]. @@ -138,3 +146,4 @@ To generate an AI-assisted postmortem draft: [2]: https://app.datadoghq.com/incidents/settings?section=notification-rules [3]: https://app.datadoghq.com/incidents/settings?section=postmortem-templates [4]: https://app.datadoghq.com/incidents/settings?section=integrations +[5]: /service_management/incident_management/follow-ups diff --git a/static/images/bits_ai/coordinate_incidents_slack_settings.png b/static/images/service_management/incidents/incident_ai/coordinate_incidents_slack_settings.png similarity index 100% rename from static/images/bits_ai/coordinate_incidents_slack_settings.png rename to static/images/service_management/incidents/incident_ai/coordinate_incidents_slack_settings.png diff --git a/static/images/bits_ai/incident_summary.png b/static/images/service_management/incidents/incident_ai/incident_summary.png similarity index 100% rename from static/images/bits_ai/incident_summary.png rename to static/images/service_management/incidents/incident_ai/incident_summary.png diff --git a/static/images/bits_ai/message_template_variables.png b/static/images/service_management/incidents/incident_ai/message_template_variables.png similarity index 100% rename from static/images/bits_ai/message_template_variables.png rename to static/images/service_management/incidents/incident_ai/message_template_variables.png