From e66fd25bbbafe2075e401cc9dc90016f607e2317 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Wed, 21 Sep 2022 14:08:55 +0800
Subject: [PATCH 01/15] Introduce apm data type

---
 apmproxy/apmserver.go        |  4 ++--
 apmproxy/apmserver_test.go   | 20 ++++++++++----------
 apmproxy/client.go           |  4 ++--
 apmproxy/metadata.go         |  2 +-
 apmproxy/metadata_test.go    |  7 ++++---
 apmproxy/option.go           |  2 +-
 apmproxy/receiver.go         | 19 ++++++++++++++++---
 logsapi/functionlogs.go      |  8 ++++----
 logsapi/functionlogs_test.go |  1 +
 logsapi/metrics.go           |  8 ++++----
 logsapi/metrics_test.go      | 11 +++++++----
 11 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 35fb4396..fa3d9ee5 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -93,7 +93,7 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 // The function compresses the APM agent data, if it's not already compressed.
 // It sets the APM transport status to failing upon errors, as part of the backoff
 // strategy.
-func (c *Client) PostToApmServer(ctx context.Context, agentData AgentData) error {
+func (c *Client) PostToApmServer(ctx context.Context, agentData APMData) error {
 	// todo: can this be a streaming or streaming style call that keeps the
 	//       connection open across invocations?
 	if c.IsUnhealthy() {
@@ -283,7 +283,7 @@ func (c *Client) ComputeGracePeriod() time.Duration {
 
 // EnqueueAPMData adds a AgentData struct to the agent data channel, effectively queueing for a send
 // to the APM server.
-func (c *Client) EnqueueAPMData(agentData AgentData) {
+func (c *Client) EnqueueAPMData(agentData APMData) {
 	select {
 	case c.DataChannel <- agentData:
 		c.logger.Debug("Adding agent data to buffer to be sent to apm server")
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index 3f028ff0..eb34e010 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -57,7 +57,7 @@ func TestPostToApmServerDataCompressed(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -83,7 +83,7 @@ func TestPostToApmServerDataCompressed(t *testing.T) {
 func TestPostToApmServerDataNotCompressed(t *testing.T) {
 	s := "A long time ago in a galaxy far, far away..."
 	body := []byte(s)
-	agentData := apmproxy.AgentData{Data: body, ContentEncoding: ""}
+	agentData := apmproxy.APMData{Data: body, ContentEncoding: ""}
 
 	// Compress the data, so it can be compared with what
 	// the apm server receives
@@ -251,7 +251,7 @@ func TestEnterBackoffFromHealthy(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -302,7 +302,7 @@ func TestEnterBackoffFromFailing(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -356,7 +356,7 @@ func TestAPMServerRecovery(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -409,7 +409,7 @@ func TestAPMServerAuthFails(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -453,7 +453,7 @@ func TestAPMServerRatelimit(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	var shouldSucceed atomic.Bool
@@ -506,7 +506,7 @@ func TestAPMServerClientFail(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	var shouldSucceed atomic.Bool
@@ -558,7 +558,7 @@ func TestContinuedAPMServerFailure(t *testing.T) {
 
 	// Create AgentData struct with compressed data
 	data, _ := io.ReadAll(pr)
-	agentData := apmproxy.AgentData{Data: data, ContentEncoding: "gzip"}
+	agentData := apmproxy.APMData{Data: data, ContentEncoding: "gzip"}
 
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -615,7 +615,7 @@ func BenchmarkPostToAPM(b *testing.B) {
 {"transaction": { "id": "00xxxxFFaaaa1234", "trace_id": "0123456789abcdef0123456789abcdef", "name": "amqp receive", "parent_id": "abcdefabcdef01234567", "type": "messaging", "duration": 3, "span_count": { "started": 1 }, "context": {"message": {"queue": { "name": "new_users"}, "age":{ "ms": 1577958057123}, "headers": {"user_id": "1ax3", "involved_services": ["user", "auth"]}, "body": "user created", "routing_key": "user-created-transaction"}},"session":{"id":"sunday","sequence":123}}}
 {"transaction": { "name": "july-2021-delete-after-july-31", "type": "lambda", "result": "success", "id": "142e61450efb8574", "trace_id": "eb56529a1f461c5e7e2f66ecb075e983", "subtype": null, "action": null, "duration": 38.853, "timestamp": 1631736666365048, "sampled": true, "context": { "cloud": { "origin": { "account": { "id": "abc123" }, "provider": "aws", "region": "us-east-1", "service": { "name": "serviceName" } } }, "service": { "origin": { "id": "abc123", "name": "service-name", "version": "1.0" } }, "user": {}, "tags": {}, "custom": { } }, "sync": true, "span_count": { "started": 0 }, "outcome": "unknown", "faas": { "coldstart": false, "execution": "2e13b309-23e1-417f-8bf7-074fc96bc683", "trigger": { "request_id": "FuH2Cir_vHcEMUA=", "type": "http" } }, "sample_rate": 1 } }
 `)
-	agentData := apmproxy.AgentData{Data: benchBody, ContentEncoding: ""}
+	agentData := apmproxy.APMData{Data: benchBody, ContentEncoding: ""}
 
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 7b161abe..6899133f 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -52,7 +52,7 @@ const (
 type Client struct {
 	mu                sync.RWMutex
 	bufferPool        sync.Pool
-	DataChannel       chan AgentData
+	DataChannel       chan APMData
 	client            *http.Client
 	Status            Status
 	ReconnectionCount int
@@ -72,7 +72,7 @@ func NewClient(opts ...Option) (*Client, error) {
 		bufferPool: sync.Pool{New: func() interface{} {
 			return &bytes.Buffer{}
 		}},
-		DataChannel: make(chan AgentData, defaultAgentBufferSize),
+		DataChannel: make(chan APMData, defaultAgentBufferSize),
 		client: &http.Client{
 			Transport: http.DefaultTransport.(*http.Transport).Clone(),
 		},
diff --git a/apmproxy/metadata.go b/apmproxy/metadata.go
index d0becec0..2f10a2ae 100644
--- a/apmproxy/metadata.go
+++ b/apmproxy/metadata.go
@@ -31,7 +31,7 @@ type MetadataContainer struct {
 
 // ProcessMetadata return a byte array containing the Metadata marshaled in JSON
 // In case we want to update the Metadata values, usage of https://github.com/tidwall/sjson is advised
-func ProcessMetadata(data AgentData) ([]byte, error) {
+func ProcessMetadata(data APMData) ([]byte, error) {
 	uncompressedData, err := GetUncompressedBytes(data.Data, data.ContentEncoding)
 	if err != nil {
 		return nil, fmt.Errorf("error uncompressing agent data for metadata extraction: %w", err)
diff --git a/apmproxy/metadata_test.go b/apmproxy/metadata_test.go
index be88a0f3..0bc3b3c3 100644
--- a/apmproxy/metadata_test.go
+++ b/apmproxy/metadata_test.go
@@ -21,10 +21,11 @@ import (
 	"bytes"
 	"compress/gzip"
 	"compress/zlib"
-	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"io"
 	"testing"
 
+	"github.com/elastic/apm-aws-lambda/apmproxy"
+
 	"github.com/stretchr/testify/require"
 )
 
@@ -133,7 +134,7 @@ func Test_processMetadata(t *testing.T) {
 
 	for name, tc := range testCases {
 		t.Run(name, func(t *testing.T) {
-			agentData := apmproxy.AgentData{Data: tc.data(), ContentEncoding: tc.encodingType}
+			agentData := apmproxy.APMData{Data: tc.data(), ContentEncoding: tc.encodingType}
 			extractedMetadata, err := apmproxy.ProcessMetadata(agentData)
 
 			if tc.expectError != nil {
@@ -167,7 +168,7 @@ func BenchmarkProcessMetadata(b *testing.B) {
 	}
 
 	for _, bench := range benchmarks {
-		agentData := apmproxy.AgentData{Data: bench.body, ContentEncoding: ""}
+		agentData := apmproxy.APMData{Data: bench.body, ContentEncoding: ""}
 
 		b.Run(bench.name, func(b *testing.B) {
 			b.ReportAllocs()
diff --git a/apmproxy/option.go b/apmproxy/option.go
index 60b353f5..827ed218 100644
--- a/apmproxy/option.go
+++ b/apmproxy/option.go
@@ -74,7 +74,7 @@ func WithSendStrategy(strategy SendStrategy) Option {
 // WithAgentDataBufferSize sets the agent data buffer size.
 func WithAgentDataBufferSize(size int) Option {
 	return func(c *Client) {
-		c.DataChannel = make(chan AgentData, size)
+		c.DataChannel = make(chan APMData, size)
 	}
 }
 
diff --git a/apmproxy/receiver.go b/apmproxy/receiver.go
index aca911f4..b385c9b1 100644
--- a/apmproxy/receiver.go
+++ b/apmproxy/receiver.go
@@ -29,12 +29,24 @@ import (
 	"time"
 )
 
-type AgentData struct {
+// APMDataType represents source of APMData
+type APMDataType string
+
+const (
+	// Agent data type represents APMData collected from APM agents
+	Agent APMDataType = "agent"
+	// Lambda data type represents APMData collected from logs API
+	Lambda APMDataType = "lambda"
+)
+
+// APMData represents data to be sent to APMServer
+type APMData struct {
 	Data            []byte
+	Type            APMDataType
 	ContentEncoding string
 }
 
-// StartHttpServer starts the server listening for APM agent data.
+// StartReceiver starts the server listening for APM agent data.
 func (c *Client) StartReceiver() error {
 	mux := http.NewServeMux()
 
@@ -123,8 +135,9 @@ func (c *Client) handleIntakeV2Events() func(w http.ResponseWriter, r *http.Requ
 
 		agentFlushed := r.URL.Query().Get("flushed") == "true"
 
-		agentData := AgentData{
+		agentData := APMData{
 			Data:            rawBytes,
+			Type:            Agent,
 			ContentEncoding: r.Header.Get("Content-Encoding"),
 		}
 
diff --git a/logsapi/functionlogs.go b/logsapi/functionlogs.go
index 77298890..f5e2f594 100644
--- a/logsapi/functionlogs.go
+++ b/logsapi/functionlogs.go
@@ -92,9 +92,9 @@ func ProcessFunctionLog(
 	requestID string,
 	invokedFnArn string,
 	log LogEvent,
-) (apmproxy.AgentData, error) {
+) (apmproxy.APMData, error) {
 	if metadataContainer == nil || len(metadataContainer.Metadata) == 0 {
-		return apmproxy.AgentData{}, errors.New("metadata is required")
+		return apmproxy.APMData{}, errors.New("metadata is required")
 	}
 
 	lc := logContainer{
@@ -111,7 +111,7 @@ func ProcessFunctionLog(
 
 	var jsonWriter fastjson.Writer
 	if err := lc.MarshalFastJSON(&jsonWriter); err != nil {
-		return apmproxy.AgentData{}, err
+		return apmproxy.APMData{}, err
 	}
 
 	capacity := len(metadataContainer.Metadata) + jsonWriter.Size() + 1
@@ -120,5 +120,5 @@ func ProcessFunctionLog(
 
 	logData = append(logData, '\n')
 	logData = append(logData, jsonWriter.Bytes()...)
-	return apmproxy.AgentData{Data: logData}, nil
+	return apmproxy.APMData{Type: apmproxy.Lambda, Data: logData}, nil
 }
diff --git a/logsapi/functionlogs_test.go b/logsapi/functionlogs_test.go
index 5aa4034b..e8658a9e 100644
--- a/logsapi/functionlogs_test.go
+++ b/logsapi/functionlogs_test.go
@@ -50,5 +50,6 @@ func TestProcessFunctionLog(t *testing.T) {
 	apmData, err := ProcessFunctionLog(metadataContainer, reqID, invokedFnArn, event)
 
 	require.NoError(t, err)
+	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 	assert.Equal(t, expectedData, string(apmData.Data))
 }
diff --git a/logsapi/metrics.go b/logsapi/metrics.go
index 6108b22c..6aeac9d6 100644
--- a/logsapi/metrics.go
+++ b/logsapi/metrics.go
@@ -63,10 +63,10 @@ func (mc MetricsContainer) MarshalFastJSON(json *fastjson.Writer) error {
 	return nil
 }
 
-func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functionData *extension.NextEventResponse, platformReport LogEvent) (apmproxy.AgentData, error) {
+func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functionData *extension.NextEventResponse, platformReport LogEvent) (apmproxy.APMData, error) {
 
 	if metadataContainer == nil || len(metadataContainer.Metadata) == 0 {
-		return apmproxy.AgentData{}, errors.New("metadata is not populated")
+		return apmproxy.APMData{}, errors.New("metadata is not populated")
 	}
 
 	metricsContainer := MetricsContainer{
@@ -103,7 +103,7 @@ func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functi
 
 	var jsonWriter fastjson.Writer
 	if err := metricsContainer.MarshalFastJSON(&jsonWriter); err != nil {
-		return apmproxy.AgentData{}, err
+		return apmproxy.APMData{}, err
 	}
 
 	capacity := len(metadataContainer.Metadata) + jsonWriter.Size() + 1 // 1 for newline
@@ -112,5 +112,5 @@ func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functi
 
 	metricsData = append(metricsData, []byte("\n")...)
 	metricsData = append(metricsData, jsonWriter.Bytes()...)
-	return apmproxy.AgentData{Data: metricsData}, nil
+	return apmproxy.APMData{Type: apmproxy.Lambda, Data: metricsData}, nil
 }
diff --git a/logsapi/metrics_test.go b/logsapi/metrics_test.go
index 7ca086a5..8de5ccfd 100644
--- a/logsapi/metrics_test.go
+++ b/logsapi/metrics_test.go
@@ -75,10 +75,11 @@ func TestProcessPlatformReport_Coldstart(t *testing.T) {
 
 	desiredOutputMetrics := fmt.Sprintf(`{"metricset":{"samples":{"faas.coldstart_duration":{"value":422.9700012207031},"faas.timeout":{"value":5000},"system.memory.total":{"value":1.34217728e+08},"system.memory.actual.free":{"value":5.4525952e+07},"faas.duration":{"value":182.42999267578125},"faas.billed_duration":{"value":183}},"timestamp":%d,"faas":{"coldstart":true,"execution":"6f7f0961f83442118a7af6fe80b88d56","id":"arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"}}}`, timestamp.UnixNano()/1e3)
 
-	rawBytes, err := ProcessPlatformReport(&mc, &event, logEvent)
+	apmData, err := ProcessPlatformReport(&mc, &event, logEvent)
 	require.NoError(t, err)
+	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
-	requestBytes, err := apmproxy.GetUncompressedBytes(rawBytes.Data, "")
+	requestBytes, err := apmproxy.GetUncompressedBytes(apmData.Data, "")
 	require.NoError(t, err)
 
 	out := string(requestBytes)
@@ -135,10 +136,11 @@ func TestProcessPlatformReport_NoColdstart(t *testing.T) {
 
 	desiredOutputMetrics := fmt.Sprintf(`{"metricset":{"samples":{"faas.coldstart_duration":{"value":0},"faas.timeout":{"value":5000},"system.memory.total":{"value":1.34217728e+08},"system.memory.actual.free":{"value":5.4525952e+07},"faas.duration":{"value":182.42999267578125},"faas.billed_duration":{"value":183}},"timestamp":%d,"faas":{"coldstart":false,"execution":"6f7f0961f83442118a7af6fe80b88d56","id":"arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"}}}`, timestamp.UnixNano()/1e3)
 
-	rawBytes, err := ProcessPlatformReport(&mc, &event, logEvent)
+	apmData, err := ProcessPlatformReport(&mc, &event, logEvent)
 	require.NoError(t, err)
+	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
-	requestBytes, err := apmproxy.GetUncompressedBytes(rawBytes.Data, "")
+	requestBytes, err := apmproxy.GetUncompressedBytes(apmData.Data, "")
 	require.NoError(t, err)
 
 	out := string(requestBytes)
@@ -183,6 +185,7 @@ func TestProcessPlatformReport_DataCorruption(t *testing.T) {
 
 	agentData, err := ProcessPlatformReport(mc, nextEventResp, logEvent)
 	require.NoError(t, err)
+	assert.Equal(t, apmproxy.Lambda, agentData.Type)
 
 	// process another platform report to ensure the previous payload is not corrupted
 	logEvent.Record.RequestID = "corrupt-req-id"

From 5346d96c8d54a30b3c9b41d4ee342beedda109bd Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Thu, 22 Sep 2022 15:46:25 +0800
Subject: [PATCH 02/15] Batch lambda logs API data before sending to APM-Server

---
 apmproxy/apmserver.go        |  92 +++++++++++++++++++++++-----
 apmproxy/apmserver_test.go   | 114 +++++++++++++++++++++++++++++++++++
 apmproxy/batch.go            |  70 +++++++++++++++++++++
 apmproxy/batch_test.go       |  61 +++++++++++++++++++
 apmproxy/client.go           |   3 +
 apmproxy/metadata.go         |   4 --
 apmproxy/receiver.go         |   4 +-
 app/run.go                   |  10 +--
 logsapi/event.go             |   6 +-
 logsapi/functionlogs.go      |  18 ++----
 logsapi/functionlogs_test.go |   8 +--
 logsapi/metrics.go           |  19 ++----
 logsapi/metrics_test.go      |  81 ++-----------------------
 13 files changed, 347 insertions(+), 143 deletions(-)
 create mode 100644 apmproxy/batch.go
 create mode 100644 apmproxy/batch_test.go

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index fa3d9ee5..0d9dcdca 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -31,6 +31,8 @@ import (
 	"time"
 )
 
+const defaultBatchSize = 20
+
 type jsonResult struct {
 	Errors []jsonError `json:"errors,omitempty"`
 }
@@ -40,27 +42,21 @@ type jsonError struct {
 	Document string `json:"document,omitempty"`
 }
 
-// ForwardApmData receives agent data as it comes in and posts it to the APM server.
-// Stop checking for, and sending agent data when the function invocation
+// ForwardApmData receives apm data as it comes in and posts it to the APM server.
+// Stop checking for, and sending apm data when the function invocation
 // has completed, signaled via a channel.
-func (c *Client) ForwardApmData(ctx context.Context, metadataContainer *MetadataContainer) error {
+func (c *Client) ForwardApmData(ctx context.Context) error {
 	if c.IsUnhealthy() {
 		return nil
 	}
+	batch := NewBatch(defaultBatchSize)
 	for {
 		select {
 		case <-ctx.Done():
 			c.logger.Debug("Invocation context cancelled, not processing any more agent data")
-			return nil
-		case agentData := <-c.DataChannel:
-			if metadataContainer.Metadata == nil {
-				metadata, err := ProcessMetadata(agentData)
-				if err != nil {
-					return fmt.Errorf("failed to extract metadata from agent payload %w", err)
-				}
-				metadataContainer.Metadata = metadata
-			}
-			if err := c.PostToApmServer(ctx, agentData); err != nil {
+			return c.sendBatch(ctx, batch)
+		case apmData := <-c.DataChannel:
+			if err := c.forwardAPMDataByType(ctx, apmData, batch); err != nil {
 				return fmt.Errorf("error sending to APM server, skipping: %v", err)
 			}
 		}
@@ -74,15 +70,20 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 		return
 	}
 	c.logger.Debug("Flush started - Checking for agent data")
+	batch := NewBatch(defaultBatchSize)
 	for {
 		select {
-		case agentData := <-c.DataChannel:
+		case apmData := <-c.DataChannel:
 			c.logger.Debug("Flush in progress - Processing agent data")
-			if err := c.PostToApmServer(ctx, agentData); err != nil {
+			if err := c.forwardAPMDataByType(ctx, apmData, batch); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
 		default:
 			c.logger.Debug("Flush ended - No agent data on buffer")
+			// Flush any remaining data in batch
+			if err := c.sendBatch(ctx, batch); err != nil {
+				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
+			}
 			return
 		}
 	}
@@ -313,3 +314,64 @@ func (c *Client) WaitForFlush() <-chan struct{} {
 	defer c.flushMutex.Unlock()
 	return c.flushCh
 }
+
+func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData, batch *BatchData) error {
+	switch apmData.Type {
+	case Agent:
+		if len(c.metadata) == 0 {
+			metadata, err := ProcessMetadata(apmData)
+			if err != nil {
+				return fmt.Errorf("failed to extract metadata from agent payload %w", err)
+			}
+			c.setMetadata(metadata)
+		}
+		return c.PostToApmServer(ctx, apmData)
+	case Lambda:
+		if err := batch.Add(apmData); err != nil {
+			c.logger.Warnf("Dropping data due to error: %v", err)
+		}
+		if batch.ShouldFlush() {
+			return c.sendBatch(ctx, batch)
+		}
+		return nil
+	default:
+		return errors.New("invalid apm data type")
+	}
+}
+
+func (c *Client) sendBatch(ctx context.Context, batch *BatchData) error {
+	// TODO: @lahsivjar keep buffering logs in DataChannel until metadata
+	// is available.
+	if len(c.metadata) == 0 {
+		return errors.New("metadata not yet populated, dropping data")
+	}
+	if batch.Size() == 0 {
+		return nil
+	}
+	defer batch.Reset()
+	return c.PostToApmServer(ctx, c.getWithMetadata(batch))
+}
+
+func (c *Client) setMetadata(metadata []byte) {
+	c.metadataMutex.Lock()
+	defer c.metadataMutex.Unlock()
+	c.metadata = metadata
+}
+
+func (c *Client) getWithMetadata(batch *BatchData) APMData {
+	c.metadataMutex.RLock()
+	defer c.metadataMutex.RUnlock()
+
+	capacity := len(c.metadata) + 1
+	for _, d := range batch.agentData {
+		capacity += len(d.Data) + 1
+	}
+
+	withMeta := make([]byte, len(c.metadata), capacity)
+	copy(withMeta, c.metadata)
+	for _, d := range batch.agentData {
+		withMeta = append(withMeta, '\n')
+		withMeta = append(withMeta, d.Data...)
+	}
+	return APMData{Data: withMeta}
+}
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index eb34e010..2e313161 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -18,11 +18,14 @@
 package apmproxy_test
 
 import (
+	"bytes"
 	"compress/gzip"
 	"context"
+	"fmt"
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -586,6 +589,115 @@ func TestContinuedAPMServerFailure(t *testing.T) {
 	assert.Equal(t, apmClient.Status, apmproxy.Failing)
 }
 
+func TestForwardApmData(t *testing.T) {
+	receivedReqBodyChan := make(chan []byte)
+	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		bytes, _ := io.ReadAll(r.Body)
+		receivedReqBodyChan <- bytes
+		w.WriteHeader(http.StatusAccepted)
+	}))
+	t.Cleanup(apmServer.Close)
+	metadata := `{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"1.1.0"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`
+	assertGzipBody := func(expected string) {
+		buf := bytes.NewReader(<-receivedReqBodyChan)
+		r, err := gzip.NewReader(buf)
+		require.NoError(t, err)
+		out, err := io.ReadAll(r)
+		require.NoError(t, err)
+		assert.Equal(t, expected, string(out))
+	}
+	agentData := fmt.Sprintf("%s\n%s", metadata, `{"log": {"message": "test"}}`)
+	lambdaData := `{"log": {"message": "test"}}`
+	apmClient, err := apmproxy.NewClient(
+		apmproxy.WithURL(apmServer.URL),
+		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+	)
+	require.NoError(t, err)
+	// Override DataChannel to be unbuffered for ease of testing
+	apmClient.DataChannel = make(chan apmproxy.APMData)
+
+	// Start forwarding APM data
+	ctx, cancel := context.WithCancel(context.Background())
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		assert.NoError(t, apmClient.ForwardApmData(ctx))
+	}()
+
+	// Populate metadata by sending agent data
+	apmClient.DataChannel <- apmproxy.APMData{
+		Data: []byte(agentData),
+		Type: apmproxy.Agent,
+	}
+	assertGzipBody(agentData)
+
+	// Send lambda logs API data
+	var expected bytes.Buffer
+	expected.WriteString(metadata)
+	// Send multiple lambda logs to batch data
+	for i := 0; i < 5; i++ {
+		apmClient.DataChannel <- apmproxy.APMData{
+			Data: []byte(lambdaData),
+			Type: apmproxy.Lambda,
+		}
+		expected.WriteByte('\n')
+		expected.WriteString(lambdaData)
+	}
+
+	// Trigger a batch send by cancelling context
+	cancel()
+	assertGzipBody(expected.String())
+	// Wait for ForwardApmData to exit
+	wg.Wait()
+}
+
+func BenchmarkFlushAPMData(b *testing.B) {
+	// Create apm server and handler
+	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if _, err := io.Copy(io.Discard, r.Body); err != nil {
+			return
+		}
+		if err := r.Body.Close(); err != nil {
+			return
+		}
+		w.WriteHeader(202)
+		if _, err := w.Write([]byte(`{}`)); err != nil {
+			return
+		}
+	}))
+	b.Cleanup(apmServer.Close)
+
+	apmClient, err := apmproxy.NewClient(
+		apmproxy.WithURL(apmServer.URL),
+		apmproxy.WithLogger(zaptest.NewLogger(b).Sugar()),
+	)
+	require.NoError(b, err)
+
+	// Copied from https://github.com/elastic/apm-server/blob/master/testdata/intake-v2/transactions.ndjson.
+	agentData := []byte(`{"metadata": {"service": {"name": "1234_service-12a3","node": {"configured_name": "node-123"},"version": "5.1.3","environment": "staging","language": {"name": "ecmascript","version": "8"},"runtime": {"name": "node","version": "8.0.0"},"framework": {"name": "Express","version": "1.2.3"},"agent": {"name": "elastic-node","version": "3.14.0"}},"user": {"id": "123user", "username": "bar", "email": "bar@user.com"}, "labels": {"tag0": null, "tag1": "one", "tag2": 2}, "process": {"pid": 1234,"ppid": 6789,"title": "node","argv": ["node","server.js"]},"system": {"hostname": "prod1.example.com","architecture": "x64","platform": "darwin", "container": {"id": "container-id"}, "kubernetes": {"namespace": "namespace1", "pod": {"uid": "pod-uid", "name": "pod-name"}, "node": {"name": "node-name"}}},"cloud":{"account":{"id":"account_id","name":"account_name"},"availability_zone":"cloud_availability_zone","instance":{"id":"instance_id","name":"instance_name"},"machine":{"type":"machine_type"},"project":{"id":"project_id","name":"project_name"},"provider":"cloud_provider","region":"cloud_region","service":{"name":"lambda"}}}}
+{"transaction": { "id": "945254c567a5417e", "trace_id": "0123456789abcdef0123456789abcdef", "parent_id": "abcdefabcdef01234567", "type": "request", "duration": 32.592981,  "span_count": { "started": 43 }}}
+{"transaction": {"id": "4340a8e0df1906ecbfa9", "trace_id": "0acd456789abcdef0123456789abcdef", "name": "GET /api/types","type": "request","duration": 32.592981,"outcome":"success", "result": "success", "timestamp": 1496170407154000, "sampled": true, "span_count": {"started": 17},"context": {"service": {"runtime": {"version": "7.0"}},"page":{"referer":"http://localhost:8000/test/e2e/","url":"http://localhost:8000/test/e2e/general-usecase/"}, "request": {"socket": {"remote_address": "12.53.12.1","encrypted": true},"http_version": "1.1","method": "POST","url": {"protocol": "https:","full": "https://www.example.com/p/a/t/h?query=string#hash","hostname": "www.example.com","port": "8080","pathname": "/p/a/t/h","search": "?query=string","hash": "#hash","raw": "/p/a/t/h?query=string#hash"},"headers": {"user-agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36","Mozilla Chrome Edge"],"content-type": "text/html","cookie": "c1=v1, c2=v2","some-other-header": "foo","array": ["foo","bar","baz"]},"cookies": {"c1": "v1","c2": "v2"},"env": {"SERVER_SOFTWARE": "nginx","GATEWAY_INTERFACE": "CGI/1.1"},"body": {"str": "hello world","additional": { "foo": {},"bar": 123,"req": "additional information"}}},"response": {"status_code": 200,"headers": {"content-type": "application/json"},"headers_sent": true,"finished": true,"transfer_size":25.8,"encoded_body_size":26.90,"decoded_body_size":29.90}, "user": {"domain": "ldap://abc","id": "99","username": "foo"},"tags": {"organization_uuid": "9f0e9d64-c185-4d21-a6f4-4673ed561ec8", "tag2": 12, "tag3": 12.45, "tag4": false, "tag5": null },"custom": {"my_key": 1,"some_other_value": "foo bar","and_objects": {"foo": ["bar","baz"]},"(": "not a valid regex and that is fine"}}}}
+{"transaction": { "id": "cdef4340a8e0df19", "trace_id": "0acd456789abcdef0123456789abcdef", "type": "request", "duration": 13.980558, "timestamp": 1532976822281000, "sampled": null, "span_count": { "dropped": 55, "started": 436 }, "marks": {"navigationTiming": {"appBeforeBootstrap": 608.9300000000001,"navigationStart": -21},"another_mark": {"some_long": 10,"some_float": 10.0}, "performance": {}}, "context": { "request": { "socket": { "remote_address": "192.0.1", "encrypted": null }, "method": "POST", "headers": { "user-agent": null, "content-type": null, "cookie": null }, "url": { "protocol": null, "full": null, "hostname": null, "port": null, "pathname": null, "search": null, "hash": null, "raw": null } }, "response": { "headers": { "content-type": null } }, "service": {"environment":"testing","name": "service1","node": {"configured_name": "node-ABC"}, "language": {"version": "2.5", "name": "ruby"}, "agent": {"version": "2.2", "name": "elastic-ruby", "ephemeral_id": "justanid"}, "framework": {"version": "5.0", "name": "Rails"}, "version": "2", "runtime": {"version": "2.5", "name": "cruby"}}},"experience":{"cls":1,"fid":2.0,"tbt":3.4,"longtask":{"count":3,"sum":2.5,"max":1}}}}
+{"transaction": { "id": "00xxxxFFaaaa1234", "trace_id": "0123456789abcdef0123456789abcdef", "name": "amqp receive", "parent_id": "abcdefabcdef01234567", "type": "messaging", "duration": 3, "span_count": { "started": 1 }, "context": {"message": {"queue": { "name": "new_users"}, "age":{ "ms": 1577958057123}, "headers": {"user_id": "1ax3", "involved_services": ["user", "auth"]}, "body": "user created", "routing_key": "user-created-transaction"}},"session":{"id":"sunday","sequence":123}}}
+{"transaction": { "name": "july-2021-delete-after-july-31", "type": "lambda", "result": "success", "id": "142e61450efb8574", "trace_id": "eb56529a1f461c5e7e2f66ecb075e983", "subtype": null, "action": null, "duration": 38.853, "timestamp": 1631736666365048, "sampled": true, "context": { "cloud": { "origin": { "account": { "id": "abc123" }, "provider": "aws", "region": "us-east-1", "service": { "name": "serviceName" } } }, "service": { "origin": { "id": "abc123", "name": "service-name", "version": "1.0" } }, "user": {}, "tags": {}, "custom": { } }, "sync": true, "span_count": { "started": 0 }, "outcome": "unknown", "faas": { "coldstart": false, "execution": "2e13b309-23e1-417f-8bf7-074fc96bc683", "trigger": { "request_id": "FuH2Cir_vHcEMUA=", "type": "http" } }, "sample_rate": 1 } }
+`)
+	agentAPMData := apmproxy.APMData{Data: agentData, Type: apmproxy.Agent}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		apmClient.DataChannel <- agentAPMData
+		for j := 0; j < 99; j++ {
+			apmClient.DataChannel <- apmproxy.APMData{
+				Data: []byte("this is test log"),
+				Type: apmproxy.Lambda,
+			}
+		}
+		apmClient.FlushAPMData(context.Background())
+	}
+}
+
 func BenchmarkPostToAPM(b *testing.B) {
 	// Create apm server and handler
 	apmServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -600,6 +712,7 @@ func BenchmarkPostToAPM(b *testing.B) {
 			return
 		}
 	}))
+	b.Cleanup(apmServer.Close)
 
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
@@ -617,6 +730,7 @@ func BenchmarkPostToAPM(b *testing.B) {
 `)
 	agentData := apmproxy.APMData{Data: benchBody, ContentEncoding: ""}
 
+	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		if err := apmClient.PostToApmServer(context.Background(), agentData); err != nil {
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
new file mode 100644
index 00000000..a56ad926
--- /dev/null
+++ b/apmproxy/batch.go
@@ -0,0 +1,70 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package apmproxy
+
+import "errors"
+
+// ErrBatchFull signfies that the batch has reached full
+// capacity and cannot accept more entires.
+var ErrBatchFull = errors.New("batch is full")
+
+// BatchData represents a batch of data without metadata
+// that will be sent to APMServer. BatchData is not safe
+// concurrent access.
+type BatchData struct {
+	agentData []APMData
+	maxSize   int
+}
+
+// NewBatch creates a new BatchData which can accept a
+// maximum number of entires as specified by the argument
+func NewBatch(maxSize int) *BatchData {
+	return &BatchData{
+		maxSize:   maxSize,
+		agentData: make([]APMData, 0, maxSize),
+	}
+}
+
+// Add adds a new entry to the batch. Returns ErrBatchFull
+// if batch has reached it's maximum size.
+func (b *BatchData) Add(d APMData) error {
+	if len(b.agentData) >= b.maxSize {
+		return ErrBatchFull
+	}
+
+	b.agentData = append(b.agentData, d)
+	return nil
+}
+
+// Size return the number of entries in batch.
+func (b *BatchData) Size() int {
+	return len(b.agentData)
+}
+
+// ShouldFlush indicates when a batch is ready for flush.
+// A batch is marked as ready for flush once it reaches
+// 90% of its max size.
+func (b *BatchData) ShouldFlush() bool {
+	return len(b.agentData) >= int(float32(b.maxSize)*0.9)
+}
+
+// Reset resets the batch to prepare for new set of data
+func (b *BatchData) Reset() {
+	b.agentData = nil
+	b.agentData = make([]APMData, 0, b.maxSize)
+}
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
new file mode 100644
index 00000000..f71b7de9
--- /dev/null
+++ b/apmproxy/batch_test.go
@@ -0,0 +1,61 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package apmproxy
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestAdd(t *testing.T) {
+	t.Run("empty", func(t *testing.T) {
+		b := NewBatch(1)
+
+		assert.NoError(t, b.Add(APMData{}))
+	})
+	t.Run("full", func(t *testing.T) {
+		b := NewBatch(1)
+		require.NoError(t, b.Add(APMData{}))
+
+		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{}))
+	})
+}
+
+func TestReset(t *testing.T) {
+	b := NewBatch(1)
+	require.NoError(t, b.Add(APMData{}))
+	require.Equal(t, 1, b.Size())
+	b.Reset()
+
+	assert.Equal(t, 0, b.Size())
+}
+
+func TestShouldFlush(t *testing.T) {
+	b := NewBatch(10)
+
+	// Should flush at 90% full
+	for i := 0; i < 9; i++ {
+		assert.False(t, b.ShouldFlush())
+		require.NoError(t, b.Add(APMData{}))
+	}
+
+	require.Equal(t, 9, b.Size())
+	assert.True(t, b.ShouldFlush())
+}
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 6899133f..1269b2c9 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -65,6 +65,9 @@ type Client struct {
 
 	flushMutex sync.Mutex
 	flushCh    chan struct{}
+
+	metadataMutex sync.RWMutex
+	metadata      []byte
 }
 
 func NewClient(opts ...Option) (*Client, error) {
diff --git a/apmproxy/metadata.go b/apmproxy/metadata.go
index 2f10a2ae..6d63b292 100644
--- a/apmproxy/metadata.go
+++ b/apmproxy/metadata.go
@@ -25,10 +25,6 @@ import (
 	"io"
 )
 
-type MetadataContainer struct {
-	Metadata []byte
-}
-
 // ProcessMetadata return a byte array containing the Metadata marshaled in JSON
 // In case we want to update the Metadata values, usage of https://github.com/tidwall/sjson is advised
 func ProcessMetadata(data APMData) ([]byte, error) {
diff --git a/apmproxy/receiver.go b/apmproxy/receiver.go
index b385c9b1..74843ad8 100644
--- a/apmproxy/receiver.go
+++ b/apmproxy/receiver.go
@@ -39,7 +39,9 @@ const (
 	Lambda APMDataType = "lambda"
 )
 
-// APMData represents data to be sent to APMServer
+// APMData represents data to be sent to APMServer. `Agent` type
+// data will have `metadata` as ndjson whereas `lambda` type data
+// will be without metadata.
 type APMData struct {
 	Data            []byte
 	Type            APMDataType
diff --git a/app/run.go b/app/run.go
index 8a9a1438..f4035660 100644
--- a/app/run.go
+++ b/app/run.go
@@ -23,7 +23,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"github.com/elastic/apm-aws-lambda/extension"
 )
 
@@ -82,9 +81,6 @@ func (app *App) Run(ctx context.Context) error {
 
 	// The previous event id is used to validate the received Lambda metrics
 	var prevEvent *extension.NextEventResponse
-	// This data structure contains metadata tied to the current Lambda instance. If empty, it is populated once for each
-	// active Lambda environment
-	metadataContainer := apmproxy.MetadataContainer{}
 
 	for {
 		select {
@@ -96,7 +92,7 @@ func (app *App) Run(ctx context.Context) error {
 			// Use a wait group to ensure the background go routine sending to the APM server
 			// completes before signaling that the extension is ready for the next invocation.
 			var backgroundDataSendWg sync.WaitGroup
-			event, err := app.processEvent(ctx, &backgroundDataSendWg, prevEvent, &metadataContainer)
+			event, err := app.processEvent(ctx, &backgroundDataSendWg, prevEvent)
 			if err != nil {
 				return err
 			}
@@ -120,7 +116,6 @@ func (app *App) processEvent(
 	ctx context.Context,
 	backgroundDataSendWg *sync.WaitGroup,
 	prevEvent *extension.NextEventResponse,
-	metadataContainer *apmproxy.MetadataContainer,
 ) (*extension.NextEventResponse, error) {
 	// Reset flush state for future events.
 	defer app.apmClient.ResetFlush()
@@ -159,7 +154,7 @@ func (app *App) processEvent(
 	backgroundDataSendWg.Add(1)
 	go func() {
 		defer backgroundDataSendWg.Done()
-		if err := app.apmClient.ForwardApmData(invocationCtx, metadataContainer); err != nil {
+		if err := app.apmClient.ForwardApmData(invocationCtx); err != nil {
 			app.logger.Error(err)
 		}
 	}()
@@ -174,7 +169,6 @@ func (app *App) processEvent(
 				event.RequestID,
 				event.InvokedFunctionArn,
 				app.apmClient,
-				metadataContainer,
 				runtimeDone,
 				prevEvent,
 			); err != nil {
diff --git a/logsapi/event.go b/logsapi/event.go
index 0cefa6be..0a23e316 100644
--- a/logsapi/event.go
+++ b/logsapi/event.go
@@ -60,7 +60,6 @@ func (lc *Client) ProcessLogs(
 	requestID string,
 	invokedFnArn string,
 	apmClient *apmproxy.Client,
-	metadataContainer *apmproxy.MetadataContainer,
 	runtimeDoneSignal chan struct{},
 	prevEvent *extension.NextEventResponse,
 ) error {
@@ -89,7 +88,7 @@ func (lc *Client) ProcessLogs(
 			case PlatformReport:
 				if prevEvent != nil && logEvent.Record.RequestID == prevEvent.RequestID {
 					lc.logger.Debug("Received platform report for the previous function invocation")
-					processedMetrics, err := ProcessPlatformReport(metadataContainer, prevEvent, logEvent)
+					processedMetrics, err := ProcessPlatformReport(prevEvent, logEvent)
 					if err != nil {
 						lc.logger.Errorf("Error processing Lambda platform metrics : %v", err)
 					} else {
@@ -100,11 +99,8 @@ func (lc *Client) ProcessLogs(
 					lc.logger.Debug("Log API runtimeDone event request id didn't match")
 				}
 			case FunctionLog:
-				// TODO: @lahsivjar Buffer logs and send batches of data to APM-Server.
-				// Buffering should account for metadata being available before sending.
 				lc.logger.Debug("Received function log")
 				processedLog, err := ProcessFunctionLog(
-					metadataContainer,
 					platformStartReqID,
 					invokedFnArn,
 					logEvent,
diff --git a/logsapi/functionlogs.go b/logsapi/functionlogs.go
index f5e2f594..d32219fd 100644
--- a/logsapi/functionlogs.go
+++ b/logsapi/functionlogs.go
@@ -18,8 +18,6 @@
 package logsapi
 
 import (
-	"errors"
-
 	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"go.elastic.co/apm/v2/model"
 	"go.elastic.co/fastjson"
@@ -88,15 +86,10 @@ func (lc logContainer) MarshalFastJSON(json *fastjson.Writer) error {
 // ProcessFunctionLog consumes agent metadata and log event from Lambda
 // logs API to create a payload for APM server.
 func ProcessFunctionLog(
-	metadataContainer *apmproxy.MetadataContainer,
 	requestID string,
 	invokedFnArn string,
 	log LogEvent,
 ) (apmproxy.APMData, error) {
-	if metadataContainer == nil || len(metadataContainer.Metadata) == 0 {
-		return apmproxy.APMData{}, errors.New("metadata is required")
-	}
-
 	lc := logContainer{
 		Log: &logLine{
 			Timestamp: model.Time(log.Time),
@@ -114,11 +107,8 @@ func ProcessFunctionLog(
 		return apmproxy.APMData{}, err
 	}
 
-	capacity := len(metadataContainer.Metadata) + jsonWriter.Size() + 1
-	logData := make([]byte, len(metadataContainer.Metadata), capacity)
-	copy(logData, metadataContainer.Metadata)
-
-	logData = append(logData, '\n')
-	logData = append(logData, jsonWriter.Bytes()...)
-	return apmproxy.APMData{Type: apmproxy.Lambda, Data: logData}, nil
+	return apmproxy.APMData{
+		Type: apmproxy.Lambda,
+		Data: jsonWriter.Bytes(),
+	}, nil
 }
diff --git a/logsapi/functionlogs_test.go b/logsapi/functionlogs_test.go
index e8658a9e..7952bf04 100644
--- a/logsapi/functionlogs_test.go
+++ b/logsapi/functionlogs_test.go
@@ -28,9 +28,6 @@ import (
 )
 
 func TestProcessFunctionLog(t *testing.T) {
-	metadataContainer := &apmproxy.MetadataContainer{
-		Metadata: []byte(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"1.1.0"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`),
-	}
 	event := LogEvent{
 		Time:         time.Date(2022, 11, 12, 0, 0, 0, 0, time.UTC),
 		Type:         FunctionLog,
@@ -39,15 +36,14 @@ func TestProcessFunctionLog(t *testing.T) {
 	reqID := "8476a536-e9f4-11e8-9739-2dfe598c3fcd"
 	invokedFnArn := "arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"
 	expectedData := fmt.Sprintf(
-		"%s\n{\"log\":{\"message\":\"%s\",\"@timestamp\":%d,\"faas\":{\"id\":\"%s\",\"execution\":\"%s\"}}}",
-		metadataContainer.Metadata,
+		"{\"log\":{\"message\":\"%s\",\"@timestamp\":%d,\"faas\":{\"id\":\"%s\",\"execution\":\"%s\"}}}",
 		event.StringRecord,
 		event.Time.UnixNano()/int64(time.Microsecond),
 		invokedFnArn,
 		reqID,
 	)
 
-	apmData, err := ProcessFunctionLog(metadataContainer, reqID, invokedFnArn, event)
+	apmData, err := ProcessFunctionLog(reqID, invokedFnArn, event)
 
 	require.NoError(t, err)
 	assert.Equal(t, apmproxy.Lambda, apmData.Type)
diff --git a/logsapi/metrics.go b/logsapi/metrics.go
index 6aeac9d6..dc670047 100644
--- a/logsapi/metrics.go
+++ b/logsapi/metrics.go
@@ -18,7 +18,6 @@
 package logsapi
 
 import (
-	"errors"
 	"math"
 
 	"github.com/elastic/apm-aws-lambda/apmproxy"
@@ -63,12 +62,7 @@ func (mc MetricsContainer) MarshalFastJSON(json *fastjson.Writer) error {
 	return nil
 }
 
-func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functionData *extension.NextEventResponse, platformReport LogEvent) (apmproxy.APMData, error) {
-
-	if metadataContainer == nil || len(metadataContainer.Metadata) == 0 {
-		return apmproxy.APMData{}, errors.New("metadata is not populated")
-	}
-
+func ProcessPlatformReport(functionData *extension.NextEventResponse, platformReport LogEvent) (apmproxy.APMData, error) {
 	metricsContainer := MetricsContainer{
 		Metrics: &model.Metrics{},
 	}
@@ -106,11 +100,8 @@ func ProcessPlatformReport(metadataContainer *apmproxy.MetadataContainer, functi
 		return apmproxy.APMData{}, err
 	}
 
-	capacity := len(metadataContainer.Metadata) + jsonWriter.Size() + 1 // 1 for newline
-	metricsData := make([]byte, len(metadataContainer.Metadata), capacity)
-	copy(metricsData, metadataContainer.Metadata)
-
-	metricsData = append(metricsData, []byte("\n")...)
-	metricsData = append(metricsData, jsonWriter.Bytes()...)
-	return apmproxy.APMData{Type: apmproxy.Lambda, Data: metricsData}, nil
+	return apmproxy.APMData{
+		Type: apmproxy.Lambda,
+		Data: jsonWriter.Bytes(),
+	}, nil
 }
diff --git a/logsapi/metrics_test.go b/logsapi/metrics_test.go
index 8de5ccfd..837276d7 100644
--- a/logsapi/metrics_test.go
+++ b/logsapi/metrics_test.go
@@ -20,7 +20,6 @@ package logsapi
 import (
 	"fmt"
 	"log"
-	"strings"
 	"testing"
 	"time"
 
@@ -32,10 +31,6 @@ import (
 )
 
 func TestProcessPlatformReport_Coldstart(t *testing.T) {
-	mc := apmproxy.MetadataContainer{
-		Metadata: []byte(fmt.Sprintf(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"%s"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`, extension.Version)),
-	}
-
 	timestamp := time.Now()
 
 	pm := PlatformMetrics{
@@ -71,11 +66,9 @@ func TestProcessPlatformReport_Coldstart(t *testing.T) {
 		},
 	}
 
-	desiredOutputMetadata := fmt.Sprintf(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"%s"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`, extension.Version)
-
 	desiredOutputMetrics := fmt.Sprintf(`{"metricset":{"samples":{"faas.coldstart_duration":{"value":422.9700012207031},"faas.timeout":{"value":5000},"system.memory.total":{"value":1.34217728e+08},"system.memory.actual.free":{"value":5.4525952e+07},"faas.duration":{"value":182.42999267578125},"faas.billed_duration":{"value":183}},"timestamp":%d,"faas":{"coldstart":true,"execution":"6f7f0961f83442118a7af6fe80b88d56","id":"arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"}}}`, timestamp.UnixNano()/1e3)
 
-	apmData, err := ProcessPlatformReport(&mc, &event, logEvent)
+	apmData, err := ProcessPlatformReport(&event, logEvent)
 	require.NoError(t, err)
 	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
@@ -85,18 +78,10 @@ func TestProcessPlatformReport_Coldstart(t *testing.T) {
 	out := string(requestBytes)
 	log.Println(out)
 
-	processingResult := strings.Split(string(requestBytes), "\n")
-
-	assert.JSONEq(t, desiredOutputMetadata, processingResult[0])
-	assert.JSONEq(t, desiredOutputMetrics, processingResult[1])
+	assert.JSONEq(t, desiredOutputMetrics, string(requestBytes))
 }
 
 func TestProcessPlatformReport_NoColdstart(t *testing.T) {
-
-	mc := apmproxy.MetadataContainer{
-		Metadata: []byte(fmt.Sprintf(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"%s"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`, extension.Version)),
-	}
-
 	timestamp := time.Now()
 
 	pm := PlatformMetrics{
@@ -132,75 +117,19 @@ func TestProcessPlatformReport_NoColdstart(t *testing.T) {
 		},
 	}
 
-	desiredOutputMetadata := fmt.Sprintf(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"%s"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`, extension.Version)
-
 	desiredOutputMetrics := fmt.Sprintf(`{"metricset":{"samples":{"faas.coldstart_duration":{"value":0},"faas.timeout":{"value":5000},"system.memory.total":{"value":1.34217728e+08},"system.memory.actual.free":{"value":5.4525952e+07},"faas.duration":{"value":182.42999267578125},"faas.billed_duration":{"value":183}},"timestamp":%d,"faas":{"coldstart":false,"execution":"6f7f0961f83442118a7af6fe80b88d56","id":"arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"}}}`, timestamp.UnixNano()/1e3)
 
-	apmData, err := ProcessPlatformReport(&mc, &event, logEvent)
+	apmData, err := ProcessPlatformReport(&event, logEvent)
 	require.NoError(t, err)
 	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
 	requestBytes, err := apmproxy.GetUncompressedBytes(apmData.Data, "")
 	require.NoError(t, err)
 
-	out := string(requestBytes)
-	log.Println(out)
-
-	processingResult := strings.Split(string(requestBytes), "\n")
-
-	assert.JSONEq(t, desiredOutputMetadata, processingResult[0])
-	assert.JSONEq(t, desiredOutputMetrics, processingResult[1])
-}
-
-func TestProcessPlatformReport_DataCorruption(t *testing.T) {
-	// Allocate big capacity metadata array to prevent reallocation
-	raw := make([]byte, 0, 7000)
-	mc := &apmproxy.MetadataContainer{
-		Metadata: append(raw, []byte(fmt.Sprintf(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"%s"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`, extension.Version))...),
-	}
-	reqID := "8476a536-e9f4-11e8-9739-2dfe598c3fcd"
-	invokedFnArn := "arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"
-	timestamp := time.Date(2022, 11, 12, 0, 0, 0, 0, time.UTC)
-	logEvent := LogEvent{
-		Time: timestamp,
-		Type: PlatformReport,
-		Record: LogEventRecord{
-			RequestID: reqID,
-			Metrics: PlatformMetrics{
-				DurationMs:       1.0,
-				BilledDurationMs: 1,
-				MemorySizeMB:     1,
-				MaxMemoryUsedMB:  1,
-				InitDurationMs:   1.0,
-			},
-		},
-	}
-	nextEventResp := &extension.NextEventResponse{
-		Timestamp:          timestamp,
-		EventType:          extension.Invoke,
-		RequestID:          reqID,
-		InvokedFunctionArn: invokedFnArn,
-	}
-	expected := "{\"metricset\":{\"samples\":{\"system.memory.total\":{\"value\":1.048576e+06},\"system.memory.actual.free\":{\"value\":0},\"faas.duration\":{\"value\":1},\"faas.billed_duration\":{\"value\":1},\"faas.coldstart_duration\":{\"value\":1},\"faas.timeout\":{\"value\":-1.6682112e+12}},\"timestamp\":1668211200000000,\"faas\":{\"coldstart\":true,\"execution\":\"8476a536-e9f4-11e8-9739-2dfe598c3fcd\",\"id\":\"arn:aws:lambda:us-east-2:123456789012:function:custom-runtime\"}}}"
-
-	agentData, err := ProcessPlatformReport(mc, nextEventResp, logEvent)
-	require.NoError(t, err)
-	assert.Equal(t, apmproxy.Lambda, agentData.Type)
-
-	// process another platform report to ensure the previous payload is not corrupted
-	logEvent.Record.RequestID = "corrupt-req-id"
-	nextEventResp.RequestID = "corrupt-req-id"
-	_, err = ProcessPlatformReport(mc, nextEventResp, logEvent)
-	require.NoError(t, err)
-
-	data := strings.Split(string(agentData.Data), "\n")
-	assert.JSONEq(t, expected, data[1])
+	assert.JSONEq(t, desiredOutputMetrics, string(requestBytes))
 }
 
 func BenchmarkPlatformReport(b *testing.B) {
-	mc := &apmproxy.MetadataContainer{
-		Metadata: []byte(`{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"1.1.0"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`),
-	}
 	reqID := "8476a536-e9f4-11e8-9739-2dfe598c3fcd"
 	invokedFnArn := "arn:aws:lambda:us-east-2:123456789012:function:custom-runtime"
 	timestamp := time.Date(2022, 11, 12, 0, 0, 0, 0, time.UTC)
@@ -226,7 +155,7 @@ func BenchmarkPlatformReport(b *testing.B) {
 	}
 
 	for n := 0; n < b.N; n++ {
-		_, err := ProcessPlatformReport(mc, nextEventResp, logEvent)
+		_, err := ProcessPlatformReport(nextEventResp, logEvent)
 		require.NoError(b, err)
 	}
 }

From 3ff264be427b70994f496335bbe99663e5a6e48e Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Sun, 25 Sep 2022 15:43:34 +0800
Subject: [PATCH 03/15] Pushback on lambda logs API until metadata is available

---
 apmproxy/apmserver.go      |  6 +++---
 apmproxy/apmserver_test.go | 14 +++++++++++---
 apmproxy/client.go         |  5 +++--
 apmproxy/option.go         |  8 ++++++++
 app/app.go                 |  3 +++
 logsapi/client.go          |  3 ++-
 logsapi/option.go          |  8 ++++++++
 logsapi/route_handlers.go  | 16 +++++++++++++++-
 8 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 0d9dcdca..86ced85c 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -324,6 +324,8 @@ func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData, batc
 				return fmt.Errorf("failed to extract metadata from agent payload %w", err)
 			}
 			c.setMetadata(metadata)
+			// broadcast that metadata is available
+			close(c.metadataAvailable)
 		}
 		return c.PostToApmServer(ctx, apmData)
 	case Lambda:
@@ -340,10 +342,8 @@ func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData, batc
 }
 
 func (c *Client) sendBatch(ctx context.Context, batch *BatchData) error {
-	// TODO: @lahsivjar keep buffering logs in DataChannel until metadata
-	// is available.
 	if len(c.metadata) == 0 {
-		return errors.New("metadata not yet populated, dropping data")
+		return errors.New("unexpected state, metadata should always be populated")
 	}
 	if batch.Size() == 0 {
 		return nil
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index 2e313161..3bcc4d04 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -599,7 +599,13 @@ func TestForwardApmData(t *testing.T) {
 	t.Cleanup(apmServer.Close)
 	metadata := `{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"1.1.0"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`
 	assertGzipBody := func(expected string) {
-		buf := bytes.NewReader(<-receivedReqBodyChan)
+		var body []byte
+		select {
+		case body = <-receivedReqBodyChan:
+		case <-time.After(1 * time.Second):
+			require.Fail(t, "mock APM-Server timed out waiting for request")
+		}
+		buf := bytes.NewReader(body)
 		r, err := gzip.NewReader(buf)
 		require.NoError(t, err)
 		out, err := io.ReadAll(r)
@@ -608,13 +614,15 @@ func TestForwardApmData(t *testing.T) {
 	}
 	agentData := fmt.Sprintf("%s\n%s", metadata, `{"log": {"message": "test"}}`)
 	lambdaData := `{"log": {"message": "test"}}`
+	metaAvailable := make(chan struct{})
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(metaAvailable),
+		// Use unbuffered channel for ease of testing
+		apmproxy.WithAgentDataBufferSize(0),
 	)
 	require.NoError(t, err)
-	// Override DataChannel to be unbuffered for ease of testing
-	apmClient.DataChannel = make(chan apmproxy.APMData)
 
 	// Start forwarding APM data
 	ctx, cancel := context.WithCancel(context.Background())
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 1269b2c9..3d2194d8 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -66,8 +66,9 @@ type Client struct {
 	flushMutex sync.Mutex
 	flushCh    chan struct{}
 
-	metadataMutex sync.RWMutex
-	metadata      []byte
+	metadataAvailable chan<- struct{}
+	metadataMutex     sync.RWMutex
+	metadata          []byte
 }
 
 func NewClient(opts ...Option) (*Client, error) {
diff --git a/apmproxy/option.go b/apmproxy/option.go
index 827ed218..8ec4ba75 100644
--- a/apmproxy/option.go
+++ b/apmproxy/option.go
@@ -83,3 +83,11 @@ func WithLogger(logger *zap.SugaredLogger) Option {
 		c.logger = logger
 	}
 }
+
+// WithMetadataAvailableIndicator configures a channel
+// which will broadcast metadata available event on close
+func WithMetadataAvailableIndicator(ch chan<- struct{}) Option {
+	return func(c *Client) {
+		c.metadataAvailable = ch
+	}
+}
diff --git a/app/app.go b/app/app.go
index d2b9f808..e3667533 100644
--- a/app/app.go
+++ b/app/app.go
@@ -67,6 +67,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		return nil, err
 	}
 
+	metadataAvailable := make(chan struct{})
 	app.extensionClient = extension.NewClient(c.awsLambdaRuntimeAPI, app.logger)
 
 	if !c.disableLogsAPI {
@@ -86,6 +87,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 			logsapi.WithLogBuffer(100),
 			logsapi.WithLogger(app.logger),
 			logsapi.WithSubscriptionTypes(subscriptionLogStreams...),
+			logsapi.WithMetadataAvailableIndicator(metadataAvailable),
 		)
 		if err != nil {
 			return nil, err
@@ -132,6 +134,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		apmproxy.WithLogger(app.logger),
 		apmproxy.WithAPIKey(apmServerAPIKey),
 		apmproxy.WithSecretToken(apmServerSecretToken),
+		apmproxy.WithMetadataAvailableIndicator(metadataAvailable),
 	)
 
 	ac, err := apmproxy.NewClient(apmOpts...)
diff --git a/logsapi/client.go b/logsapi/client.go
index b7d18ac0..b0d7bcf6 100644
--- a/logsapi/client.go
+++ b/logsapi/client.go
@@ -52,6 +52,7 @@ type Client struct {
 	logsAPIBaseURL           string
 	logsAPISubscriptionTypes []SubscriptionType
 	logsChannel              chan LogEvent
+	metadataAvailable        <-chan struct{}
 	listenerAddr             string
 	server                   *http.Server
 	logger                   *zap.SugaredLogger
@@ -69,7 +70,7 @@ func NewClient(opts ...ClientOption) (*Client, error) {
 	}
 
 	mux := http.NewServeMux()
-	mux.HandleFunc("/", handleLogEventsRequest(c.logger, c.logsChannel))
+	mux.HandleFunc("/", handleLogEventsRequest(c.logger, c.logsChannel, c.metadataAvailable))
 
 	c.server.Handler = mux
 
diff --git a/logsapi/option.go b/logsapi/option.go
index e431477e..677cbf4d 100644
--- a/logsapi/option.go
+++ b/logsapi/option.go
@@ -55,3 +55,11 @@ func WithSubscriptionTypes(types ...SubscriptionType) ClientOption {
 		c.logsAPISubscriptionTypes = types
 	}
 }
+
+// WithMetadataAvailableIndicator configures a channel
+// which will broadcast metadata available event on close
+func WithMetadataAvailableIndicator(ch <-chan struct{}) ClientOption {
+	return func(c *Client) {
+		c.metadataAvailable = ch
+	}
+}
diff --git a/logsapi/route_handlers.go b/logsapi/route_handlers.go
index 0981d497..7c10a956 100644
--- a/logsapi/route_handlers.go
+++ b/logsapi/route_handlers.go
@@ -25,8 +25,22 @@ import (
 	"go.uber.org/zap"
 )
 
-func handleLogEventsRequest(logger *zap.SugaredLogger, logsChannel chan LogEvent) func(w http.ResponseWriter, r *http.Request) {
+func handleLogEventsRequest(
+	logger *zap.SugaredLogger,
+	logsChannel chan LogEvent,
+	metadataAvailable <-chan struct{},
+) func(w http.ResponseWriter, r *http.Request) {
 	return func(w http.ResponseWriter, r *http.Request) {
+		select {
+		case <-metadataAvailable:
+		case <-time.After(100 * time.Millisecond):
+			// Without metadata the events cannot be processed
+			// thus we indicate lambda to keep the buffer
+			logger.Warnf("Metadata unavailable, signaling lambda to buffer log events")
+			w.WriteHeader(http.StatusInternalServerError)
+			return
+		}
+
 		var logEvents []LogEvent
 		if err := json.NewDecoder(r.Body).Decode(&logEvents); err != nil {
 			logger.Errorf("Error unmarshalling log events: %+v", err)

From d5d624edc69effdf848ce5b7fb37c2e84c18a7fd Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Mon, 26 Sep 2022 17:39:34 +0800
Subject: [PATCH 04/15] Tweak default buffering settings

---
 logsapi/subscribe.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/logsapi/subscribe.go b/logsapi/subscribe.go
index 9e96a096..c13cadcb 100644
--- a/logsapi/subscribe.go
+++ b/logsapi/subscribe.go
@@ -86,8 +86,8 @@ func (lc *Client) subscribe(types []SubscriptionType, extensionID string, uri st
 		LogTypes:      types,
 		BufferingCfg: BufferingCfg{
 			MaxItems:  10000,
-			MaxBytes:  262144,
-			TimeoutMS: 25,
+			MaxBytes:  1024 * 1024,
+			TimeoutMS: 100,
 		},
 		Destination: Destination{
 			Protocol:   "HTTP",

From 3d3c1b9b633159dbf062ea4051f2255d9989929c Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 27 Sep 2022 11:35:15 +0800
Subject: [PATCH 05/15] Refactor metadata handling and add age to batch

---
 apmproxy/apmserver.go      | 72 ++++++++++-------------------
 apmproxy/apmserver_test.go | 25 ++++++++++
 apmproxy/batch.go          | 93 ++++++++++++++++++++++++++++----------
 apmproxy/batch_test.go     | 51 +++++++++++++++------
 apmproxy/client.go         | 14 +++++-
 apmproxy/client_test.go    | 14 ++++--
 apmproxy/receiver_test.go  |  9 +++-
 app/app.go                 | 10 ++--
 app/config.go              |  9 ++++
 logsapi/client_test.go     | 33 +++++++++++---
 logsapi/event.go           |  5 +-
 main.go                    |  1 +
 main_test.go               |  4 ++
 13 files changed, 235 insertions(+), 105 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 86ced85c..40bf5cd6 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -31,8 +31,6 @@ import (
 	"time"
 )
 
-const defaultBatchSize = 20
-
 type jsonResult struct {
 	Errors []jsonError `json:"errors,omitempty"`
 }
@@ -49,14 +47,13 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 	if c.IsUnhealthy() {
 		return nil
 	}
-	batch := NewBatch(defaultBatchSize)
 	for {
 		select {
 		case <-ctx.Done():
 			c.logger.Debug("Invocation context cancelled, not processing any more agent data")
-			return c.sendBatch(ctx, batch)
+			return c.sendBatch(ctx)
 		case apmData := <-c.DataChannel:
-			if err := c.forwardAPMDataByType(ctx, apmData, batch); err != nil {
+			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
 				return fmt.Errorf("error sending to APM server, skipping: %v", err)
 			}
 		}
@@ -70,18 +67,17 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 		return
 	}
 	c.logger.Debug("Flush started - Checking for agent data")
-	batch := NewBatch(defaultBatchSize)
 	for {
 		select {
 		case apmData := <-c.DataChannel:
 			c.logger.Debug("Flush in progress - Processing agent data")
-			if err := c.forwardAPMDataByType(ctx, apmData, batch); err != nil {
+			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
 		default:
 			c.logger.Debug("Flush ended - No agent data on buffer")
 			// Flush any remaining data in batch
-			if err := c.sendBatch(ctx, batch); err != nil {
+			if err := c.sendBatch(ctx); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
 			return
@@ -282,12 +278,12 @@ func (c *Client) ComputeGracePeriod() time.Duration {
 	return time.Duration((gracePeriodWithoutJitter + jitter*gracePeriodWithoutJitter) * float64(time.Second))
 }
 
-// EnqueueAPMData adds a AgentData struct to the agent data channel, effectively queueing for a send
+// EnqueueAPMData adds a apm data struct to the agent data channel, effectively queueing for a send
 // to the APM server.
-func (c *Client) EnqueueAPMData(agentData APMData) {
+func (c *Client) EnqueueAPMData(apmData APMData) {
 	select {
-	case c.DataChannel <- agentData:
-		c.logger.Debug("Adding agent data to buffer to be sent to apm server")
+	case c.DataChannel <- apmData:
+		c.logger.Debug("Adding APM data to buffer to be sent to apm server")
 	default:
 		c.logger.Warn("Channel full: dropping a subset of agent data")
 	}
@@ -315,25 +311,30 @@ func (c *Client) WaitForFlush() <-chan struct{} {
 	return c.flushCh
 }
 
-func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData, batch *BatchData) error {
+func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData) error {
 	switch apmData.Type {
 	case Agent:
-		if len(c.metadata) == 0 {
+		if c.batch == nil {
 			metadata, err := ProcessMetadata(apmData)
 			if err != nil {
 				return fmt.Errorf("failed to extract metadata from agent payload %w", err)
 			}
-			c.setMetadata(metadata)
+			c.batch = NewBatch(c.maxBatchSize, metadata)
 			// broadcast that metadata is available
 			close(c.metadataAvailable)
 		}
 		return c.PostToApmServer(ctx, apmData)
 	case Lambda:
-		if err := batch.Add(apmData); err != nil {
+		if c.batch == nil {
+			// This state is not possible since we are pushing back on
+			// lambda logs API until metadata is available.
+			return errors.New("unexpected state, metadata not yet set")
+		}
+		if err := c.batch.Add(apmData); err != nil {
 			c.logger.Warnf("Dropping data due to error: %v", err)
 		}
-		if batch.ShouldFlush() {
-			return c.sendBatch(ctx, batch)
+		if c.batch.ShouldShip() {
+			return c.sendBatch(ctx)
 		}
 		return nil
 	default:
@@ -341,37 +342,10 @@ func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData, batc
 	}
 }
 
-func (c *Client) sendBatch(ctx context.Context, batch *BatchData) error {
-	if len(c.metadata) == 0 {
-		return errors.New("unexpected state, metadata should always be populated")
-	}
-	if batch.Size() == 0 {
+func (c *Client) sendBatch(ctx context.Context) error {
+	if c.batch == nil || c.batch.Count() == 0 {
 		return nil
 	}
-	defer batch.Reset()
-	return c.PostToApmServer(ctx, c.getWithMetadata(batch))
-}
-
-func (c *Client) setMetadata(metadata []byte) {
-	c.metadataMutex.Lock()
-	defer c.metadataMutex.Unlock()
-	c.metadata = metadata
-}
-
-func (c *Client) getWithMetadata(batch *BatchData) APMData {
-	c.metadataMutex.RLock()
-	defer c.metadataMutex.RUnlock()
-
-	capacity := len(c.metadata) + 1
-	for _, d := range batch.agentData {
-		capacity += len(d.Data) + 1
-	}
-
-	withMeta := make([]byte, len(c.metadata), capacity)
-	copy(withMeta, c.metadata)
-	for _, d := range batch.agentData {
-		withMeta = append(withMeta, '\n')
-		withMeta = append(withMeta, d.Data...)
-	}
-	return APMData{Data: withMeta}
+	defer c.batch.Reset()
+	return c.PostToApmServer(ctx, c.batch.ToAPMData())
 }
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index 3bcc4d04..604faaab 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -78,6 +78,7 @@ func TestPostToApmServerDataCompressed(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.PostToApmServer(context.Background(), agentData))
@@ -124,6 +125,7 @@ func TestPostToApmServerDataNotCompressed(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.PostToApmServer(context.Background(), agentData))
@@ -133,6 +135,7 @@ func TestGracePeriod(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -173,6 +176,7 @@ func TestSetHealthyTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -186,6 +190,7 @@ func TestSetFailingTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.ReconnectionCount = 0
@@ -198,6 +203,7 @@ func TestSetPendingTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -213,6 +219,7 @@ func TestSetPendingTransportExplicitly(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -225,6 +232,7 @@ func TestSetInvalidTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -269,6 +277,7 @@ func TestEnterBackoffFromHealthy(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -323,6 +332,7 @@ func TestEnterBackoffFromFailing(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -377,6 +387,7 @@ func TestAPMServerRecovery(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -423,6 +434,7 @@ func TestAPMServerAuthFails(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -474,6 +486,7 @@ func TestAPMServerRatelimit(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	assert.Equal(t, apmClient.Status, apmproxy.Started)
@@ -527,6 +540,7 @@ func TestAPMServerClientFail(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	assert.Equal(t, apmClient.Status, apmproxy.Started)
@@ -577,6 +591,7 @@ func TestContinuedAPMServerFailure(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -639,6 +654,14 @@ func TestForwardApmData(t *testing.T) {
 		Type: apmproxy.Agent,
 	}
 	assertGzipBody(agentData)
+	// After reading agent data with metadata the metadata indicator
+	// channel should be closed
+	select {
+	case _, ok := <-metaAvailable:
+		require.False(t, ok)
+	default:
+		require.Fail(t, "meta channel should be closed after reading agent data")
+	}
 
 	// Send lambda logs API data
 	var expected bytes.Buffer
@@ -679,6 +702,7 @@ func BenchmarkFlushAPMData(b *testing.B) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(b).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(b, err)
 
@@ -725,6 +749,7 @@ func BenchmarkPostToAPM(b *testing.B) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(b).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(b, err)
 
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
index a56ad926..7d2400a8 100644
--- a/apmproxy/batch.go
+++ b/apmproxy/batch.go
@@ -17,54 +17,101 @@
 
 package apmproxy
 
-import "errors"
+import (
+	"bytes"
+	"errors"
+	"time"
+)
 
-// ErrBatchFull signfies that the batch has reached full
-// capacity and cannot accept more entires.
-var ErrBatchFull = errors.New("batch is full")
+var (
+	// ErrBatchFull signfies that the batch has reached full capacity
+	// and cannot accept more entires.
+	ErrBatchFull = errors.New("batch is full")
+	// ErrInvalidType is returned for any APMData that is not Lambda type
+	ErrInvalidType = errors.New("only accepts lambda type data")
+	// ErrInvalidEncoding is returned for any APMData that is encoded
+	// with any encoding format
+	ErrInvalidEncoding = errors.New("encoded data not supported")
+)
+
+var (
+	maxSizeThreshold = 0.9
+)
 
 // BatchData represents a batch of data without metadata
 // that will be sent to APMServer. BatchData is not safe
 // concurrent access.
 type BatchData struct {
-	agentData []APMData
-	maxSize   int
+	metadataBytes   int
+	buf             bytes.Buffer
+	count           int
+	maxSize         int
+	maxAgeInSeconds int
+	age             int64
 }
 
 // NewBatch creates a new BatchData which can accept a
 // maximum number of entires as specified by the argument
-func NewBatch(maxSize int) *BatchData {
-	return &BatchData{
-		maxSize:   maxSize,
-		agentData: make([]APMData, 0, maxSize),
+func NewBatch(maxSize int, maxAgeInSeconds int, metadata []byte) *BatchData {
+	b := &BatchData{
+		maxSize:         maxSize,
+		maxAgeInSeconds: maxAgeInSeconds,
 	}
+	b.metadataBytes, _ = b.buf.Write(metadata)
+	return b
 }
 
 // Add adds a new entry to the batch. Returns ErrBatchFull
 // if batch has reached it's maximum size.
 func (b *BatchData) Add(d APMData) error {
-	if len(b.agentData) >= b.maxSize {
+	if b.count >= b.maxSize {
 		return ErrBatchFull
 	}
-
-	b.agentData = append(b.agentData, d)
+	if d.Type != Lambda {
+		return ErrInvalidType
+	}
+	if d.ContentEncoding != "" {
+		return ErrInvalidEncoding
+	}
+	if err := b.buf.WriteByte('\n'); err != nil {
+		return err
+	}
+	if _, err := b.buf.Write(d.Data); err != nil {
+		return err
+	}
+	if b.count == 0 {
+		// For first entry, set the age of the batch
+		b.age = time.Now().Unix()
+	}
+	b.count++
 	return nil
 }
 
-// Size return the number of entries in batch.
-func (b *BatchData) Size() int {
-	return len(b.agentData)
+// Count return the number of APMData entries in batch.
+func (b *BatchData) Count() int {
+	return b.count
 }
 
-// ShouldFlush indicates when a batch is ready for flush.
-// A batch is marked as ready for flush once it reaches
-// 90% of its max size.
-func (b *BatchData) ShouldFlush() bool {
-	return len(b.agentData) >= int(float32(b.maxSize)*0.9)
+// ShouldShip indicates when a batch is ready for sending.
+// A batch is marked as ready for flush when one of the
+// below conditions is reached:
+// 1. max size is greater than threshold (90% of maxSize)
+// 2. batch is older than maturity age
+func (b *BatchData) ShouldShip() bool {
+	return b.count >= int(float64(b.maxSize)*maxSizeThreshold) ||
+		(b.age > 0 && time.Now().Unix()-b.age > int64(b.maxAgeInSeconds))
 }
 
 // Reset resets the batch to prepare for new set of data
 func (b *BatchData) Reset() {
-	b.agentData = nil
-	b.agentData = make([]APMData, 0, b.maxSize)
+	b.count, b.age = 0, 0
+	b.buf.Truncate(b.metadataBytes)
+}
+
+// ToAPMData returns APMData with metadata and the accumulated batch
+func (b *BatchData) ToAPMData() APMData {
+	return APMData{
+		Data: b.buf.Bytes(),
+		Type: Lambda,
+	}
 }
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
index f71b7de9..b58f5787 100644
--- a/apmproxy/batch_test.go
+++ b/apmproxy/batch_test.go
@@ -19,43 +19,64 @@ package apmproxy
 
 import (
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
+const metadata = `{"metadata":{"service":{"agent":{"name":"apm-lambda-extension","version":"1.1.0"},"framework":{"name":"AWS Lambda","version":""},"language":{"name":"python","version":"3.9.8"},"runtime":{"name":"","version":""},"node":{}},"user":{},"process":{"pid":0},"system":{"container":{"id":""},"kubernetes":{"node":{},"pod":{}}},"cloud":{"provider":"","instance":{},"machine":{},"account":{},"project":{},"service":{}}}}`
+
 func TestAdd(t *testing.T) {
 	t.Run("empty", func(t *testing.T) {
-		b := NewBatch(1)
+		b := NewBatch(1, 100, []byte(metadata))
 
-		assert.NoError(t, b.Add(APMData{}))
+		assert.NoError(t, b.Add(APMData{Type: Lambda}))
 	})
 	t.Run("full", func(t *testing.T) {
-		b := NewBatch(1)
-		require.NoError(t, b.Add(APMData{}))
+		b := NewBatch(1, 100, []byte(metadata))
+		require.NoError(t, b.Add(APMData{Type: Lambda}))
 
-		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{}))
+		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{Type: Lambda}))
 	})
 }
 
 func TestReset(t *testing.T) {
-	b := NewBatch(1)
-	require.NoError(t, b.Add(APMData{}))
-	require.Equal(t, 1, b.Size())
+	b := NewBatch(1, 100, []byte(metadata))
+	require.NoError(t, b.Add(APMData{Type: Lambda}))
+	require.Equal(t, 1, b.Count())
 	b.Reset()
 
-	assert.Equal(t, 0, b.Size())
+	assert.Equal(t, 0, b.Count())
+	assert.Equal(t, 0, b.age)
 }
 
-func TestShouldFlush(t *testing.T) {
-	b := NewBatch(10)
+func TestShouldShip_ReasonSize(t *testing.T) {
+	b := NewBatch(10, 100, []byte(metadata))
 
 	// Should flush at 90% full
 	for i := 0; i < 9; i++ {
-		assert.False(t, b.ShouldFlush())
-		require.NoError(t, b.Add(APMData{}))
+		assert.False(t, b.ShouldShip())
+		require.NoError(t, b.Add(APMData{Type: Lambda}))
+	}
+
+	require.Equal(t, 9, b.Count())
+	assert.True(t, b.ShouldShip())
+}
+
+func TestShouldShip_ReasonAge(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping testing in short mode")
 	}
 
-	require.Equal(t, 9, b.Size())
-	assert.True(t, b.ShouldFlush())
+	b := NewBatch(10, 10, []byte(metadata))
+
+	assert.False(t, b.ShouldShip())
+	require.NoError(t, b.Add(APMData{Type: Lambda}))
+
+	<-time.After(11 * time.Second)
+
+	// Should flush after 10 seconds
+	require.Equal(t, 1, b.Count())
+	assert.True(t, b.ShouldShip())
 }
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 3d2194d8..b5a44fcc 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -46,6 +46,8 @@ const (
 	defaultDataForwarderTimeout time.Duration = 3 * time.Second
 	defaultReceiverAddr                       = ":8200"
 	defaultAgentBufferSize      int           = 100
+	defaultMaxBatchSize         int           = 50
+	defaultMaxBatchAge          int           = 10
 )
 
 // Client is the client used to communicate with the apm server.
@@ -66,9 +68,11 @@ type Client struct {
 	flushMutex sync.Mutex
 	flushCh    chan struct{}
 
+	batch        *BatchData
+	maxBatchSize int
+	maxBatchAge  int
+
 	metadataAvailable chan<- struct{}
-	metadataMutex     sync.RWMutex
-	metadata          []byte
 }
 
 func NewClient(opts ...Option) (*Client, error) {
@@ -90,6 +94,8 @@ func NewClient(opts ...Option) (*Client, error) {
 		},
 		sendStrategy: SyncFlush,
 		flushCh:      make(chan struct{}),
+		maxBatchSize: defaultMaxBatchSize,
+		maxBatchAge:  defaultMaxBatchAge,
 	}
 
 	c.client.Timeout = defaultDataForwarderTimeout
@@ -106,6 +112,10 @@ func NewClient(opts ...Option) (*Client, error) {
 		return nil, errors.New("logger cannot be empty")
 	}
 
+	if c.metadataAvailable == nil {
+		return nil, errors.New("metadata available indicator is required")
+	}
+
 	// normalize server URL
 	if !strings.HasSuffix(c.serverURL, "/") {
 		c.serverURL = c.serverURL + "/"
diff --git a/apmproxy/client_test.go b/apmproxy/client_test.go
index 8dd75d36..3acab6db 100644
--- a/apmproxy/client_test.go
+++ b/apmproxy/client_test.go
@@ -18,9 +18,10 @@
 package apmproxy_test
 
 import (
-	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"testing"
 
+	"github.com/elastic/apm-aws-lambda/apmproxy"
+
 	"github.com/stretchr/testify/require"
 	"go.uber.org/zap/zaptest"
 )
@@ -34,15 +35,19 @@ func TestClient(t *testing.T) {
 			expectedErr: true,
 		},
 		"missing base url": {
+			opts:        []apmproxy.Option{},
+			expectedErr: true,
+		},
+		"missing logger": {
 			opts: []apmproxy.Option{
-				apmproxy.WithURL(""),
-				apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+				apmproxy.WithURL("https://example.com"),
 			},
 			expectedErr: true,
 		},
-		"missing logger": {
+		"missing metadata indicator": {
 			opts: []apmproxy.Option{
 				apmproxy.WithURL("https://example.com"),
+				apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
 			},
 			expectedErr: true,
 		},
@@ -50,6 +55,7 @@ func TestClient(t *testing.T) {
 			opts: []apmproxy.Option{
 				apmproxy.WithURL("https://example.com"),
 				apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+				apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 			},
 		},
 	}
diff --git a/apmproxy/receiver_test.go b/apmproxy/receiver_test.go
index a66de72c..2179a1cc 100644
--- a/apmproxy/receiver_test.go
+++ b/apmproxy/receiver_test.go
@@ -19,7 +19,6 @@ package apmproxy_test
 
 import (
 	"bytes"
-	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"io"
 	"net"
 	"net/http"
@@ -28,6 +27,8 @@ import (
 	"testing"
 	"time"
 
+	"github.com/elastic/apm-aws-lambda/apmproxy"
+
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"go.uber.org/zap/zaptest"
@@ -57,6 +58,7 @@ func TestInfoProxy(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -102,6 +104,7 @@ func TestInfoProxyErrorStatusCode(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -140,6 +143,7 @@ func Test_handleInfoRequest(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -180,6 +184,7 @@ func Test_handleIntakeV2EventsQueryParam(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
@@ -222,6 +227,7 @@ func Test_handleIntakeV2EventsNoQueryParam(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
@@ -265,6 +271,7 @@ func Test_handleIntakeV2EventsQueryParamEmptyData(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
+		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
diff --git a/app/app.go b/app/app.go
index e3667533..9d39bffb 100644
--- a/app/app.go
+++ b/app/app.go
@@ -19,6 +19,7 @@ package app
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"os"
 	"strconv"
@@ -52,6 +53,10 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		opt(&c)
 	}
 
+	if c.metadataAvailable == nil {
+		return nil, errors.New("metadata available indicator is required")
+	}
+
 	app := &App{
 		extensionName: c.extensionName,
 	}
@@ -67,7 +72,6 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		return nil, err
 	}
 
-	metadataAvailable := make(chan struct{})
 	app.extensionClient = extension.NewClient(c.awsLambdaRuntimeAPI, app.logger)
 
 	if !c.disableLogsAPI {
@@ -87,7 +91,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 			logsapi.WithLogBuffer(100),
 			logsapi.WithLogger(app.logger),
 			logsapi.WithSubscriptionTypes(subscriptionLogStreams...),
-			logsapi.WithMetadataAvailableIndicator(metadataAvailable),
+			logsapi.WithMetadataAvailableIndicator(c.metadataAvailable),
 		)
 		if err != nil {
 			return nil, err
@@ -134,7 +138,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		apmproxy.WithLogger(app.logger),
 		apmproxy.WithAPIKey(apmServerAPIKey),
 		apmproxy.WithSecretToken(apmServerSecretToken),
-		apmproxy.WithMetadataAvailableIndicator(metadataAvailable),
+		apmproxy.WithMetadataAvailableIndicator(c.metadataAvailable),
 	)
 
 	ac, err := apmproxy.NewClient(apmOpts...)
diff --git a/app/config.go b/app/config.go
index 4311de5a..c34bcf61 100644
--- a/app/config.go
+++ b/app/config.go
@@ -27,6 +27,7 @@ type appConfig struct {
 	enableFunctionLogSubscription bool
 	logLevel                      string
 	logsapiAddr                   string
+	metadataAvailable             chan struct{}
 }
 
 // ConfigOption is used to configure the lambda extension
@@ -85,3 +86,11 @@ func WithAWSConfig(awsConfig aws.Config) ConfigOption {
 		c.awsConfig = awsConfig
 	}
 }
+
+// WithMetadataAvailableIndicator configures a channel which should
+// be closed when metadata is available after parsing agent data.
+func WithMetadataAvailableIndicator(ch chan struct{}) ConfigOption {
+	return func(c *appConfig) {
+		c.metadataAvailable = ch
+	}
+}
diff --git a/logsapi/client_test.go b/logsapi/client_test.go
index b29d8cb8..175f16f4 100644
--- a/logsapi/client_test.go
+++ b/logsapi/client_test.go
@@ -123,13 +123,32 @@ func TestSubscribeAWSRequest(t *testing.T) {
 	addr := "localhost:8080"
 
 	testCases := map[string]struct {
-		opts []logsapi.ClientOption
+		getOpts          func() []logsapi.ClientOption
+		expectedRespCode int
 	}{
-		"valid response": {
-			opts: []logsapi.ClientOption{
-				logsapi.WithListenerAddress(addr),
-				logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
+		"before_metadata_available": {
+			getOpts: func() []logsapi.ClientOption {
+				metaAvailable := make(chan struct{})
+				return []logsapi.ClientOption{
+					logsapi.WithListenerAddress(addr),
+					logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
+					logsapi.WithMetadataAvailableIndicator(metaAvailable),
+				}
+			},
+			expectedRespCode: http.StatusInternalServerError,
+		},
+		"after_metadata_available": {
+			getOpts: func() []logsapi.ClientOption {
+				metaAvailable := make(chan struct{})
+				// Close the channel to signal metadata is available
+				defer close(metaAvailable)
+				return []logsapi.ClientOption{
+					logsapi.WithListenerAddress(addr),
+					logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
+					logsapi.WithMetadataAvailableIndicator(metaAvailable),
+				}
 			},
+			expectedRespCode: http.StatusOK,
 		},
 	}
 	for name, tc := range testCases {
@@ -144,7 +163,7 @@ func TestSubscribeAWSRequest(t *testing.T) {
 			defer s.Close()
 
 			cOpts := append(
-				tc.opts,
+				tc.getOpts(),
 				logsapi.WithLogsAPIBaseURL(s.URL),
 				logsapi.WithLogBuffer(1),
 				logsapi.WithSubscriptionTypes(logsapi.Platform, logsapi.Function),
@@ -169,7 +188,7 @@ func TestSubscribeAWSRequest(t *testing.T) {
 			// Send the request to the logs listener
 			rsp, err := http.DefaultClient.Do(req)
 			require.NoError(t, err)
-			require.Equal(t, http.StatusOK, rsp.StatusCode)
+			require.Equal(t, tc.expectedRespCode, rsp.StatusCode)
 			require.NoError(t, rsp.Body.Close())
 			require.NoError(t, c.Shutdown())
 		})
diff --git a/logsapi/event.go b/logsapi/event.go
index 0a23e316..c961982e 100644
--- a/logsapi/event.go
+++ b/logsapi/event.go
@@ -33,6 +33,7 @@ const (
 	PlatformRuntimeDone LogEventType = "platform.runtimeDone"
 	PlatformFault       LogEventType = "platform.fault"
 	PlatformReport      LogEventType = "platform.report"
+	PlatformLogsDropped LogEventType = "platform.logsDropped"
 	PlatformStart       LogEventType = "platform.start"
 	PlatformEnd         LogEventType = "platform.end"
 	FunctionLog         LogEventType = "function"
@@ -90,7 +91,7 @@ func (lc *Client) ProcessLogs(
 					lc.logger.Debug("Received platform report for the previous function invocation")
 					processedMetrics, err := ProcessPlatformReport(prevEvent, logEvent)
 					if err != nil {
-						lc.logger.Errorf("Error processing Lambda platform metrics : %v", err)
+						lc.logger.Errorf("Error processing Lambda platform metrics: %v", err)
 					} else {
 						apmClient.EnqueueAPMData(processedMetrics)
 					}
@@ -98,6 +99,8 @@ func (lc *Client) ProcessLogs(
 					lc.logger.Warn("report event request id didn't match the previous event id")
 					lc.logger.Debug("Log API runtimeDone event request id didn't match")
 				}
+			case PlatformLogsDropped:
+				lc.logger.Warn("Logs dropped due to extension falling behind: %v", logEvent.Record)
 			case FunctionLog:
 				lc.logger.Debug("Received function log")
 				processedLog, err := ProcessFunctionLog(
diff --git a/main.go b/main.go
index 6e43626e..0a53c2c3 100644
--- a/main.go
+++ b/main.go
@@ -45,6 +45,7 @@ func main() {
 		app.WithLambdaRuntimeAPI(os.Getenv("AWS_LAMBDA_RUNTIME_API")),
 		app.WithLogLevel(os.Getenv("ELASTIC_APM_LOG_LEVEL")),
 		app.WithAWSConfig(cfg),
+		app.WithMetadataAvailableIndicator(make(chan struct{})),
 	}
 
 	captureLogs, err := strconv.ParseBool(os.Getenv("ELASTIC_APM_LAMBDA_CAPTURE_LOGS"))
diff --git a/main_test.go b/main_test.go
index 72609e82..e31579c3 100644
--- a/main_test.go
+++ b/main_test.go
@@ -513,6 +513,8 @@ func TestAPMServerHangs(t *testing.T) {
 	newMockLambdaServer(t, logsapiAddr, eventsChannel, l)
 
 	eventsChain := []MockEvent{
+		// The first response sets the metadata so that the lambda logs handler is accepting requests
+		{Type: InvokeStandard, APMServerBehavior: TimelyResponse, ExecutionDuration: 1, Timeout: 5},
 		{Type: InvokeStandard, APMServerBehavior: Hangs, ExecutionDuration: 1, Timeout: 500},
 	}
 	eventQueueGenerator(eventsChain, eventsChannel)
@@ -772,12 +774,14 @@ func TestMetricsWithMetadata(t *testing.T) {
 
 func runApp(t *testing.T, logsapiAddr string) <-chan struct{} {
 	ctx, cancel := context.WithCancel(context.Background())
+	metadataIndicator := make(chan struct{})
 
 	app, err := app.New(ctx,
 		app.WithExtensionName("apm-lambda-extension"),
 		app.WithLambdaRuntimeAPI(os.Getenv("AWS_LAMBDA_RUNTIME_API")),
 		app.WithLogLevel("debug"),
 		app.WithLogsapiAddress(logsapiAddr),
+		app.WithMetadataAvailableIndicator(metadataIndicator),
 	)
 	require.NoError(t, err)
 

From e42a3df3c60b85519101d094ceaa3b543fbfe315 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Thu, 29 Sep 2022 10:57:42 +0800
Subject: [PATCH 06/15] Fix batch creation and main_test logic

---
 apmproxy/apmserver.go  |  2 +-
 apmproxy/batch.go      |  6 +++---
 apmproxy/batch_test.go | 12 ++++--------
 main_test.go           |  2 +-
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 40bf5cd6..dbbbb794 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -319,7 +319,7 @@ func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData) erro
 			if err != nil {
 				return fmt.Errorf("failed to extract metadata from agent payload %w", err)
 			}
-			c.batch = NewBatch(c.maxBatchSize, metadata)
+			c.batch = NewBatch(c.maxBatchSize, c.maxBatchAge, metadata)
 			// broadcast that metadata is available
 			close(c.metadataAvailable)
 		}
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
index 7d2400a8..37860340 100644
--- a/apmproxy/batch.go
+++ b/apmproxy/batch.go
@@ -25,7 +25,7 @@ import (
 
 var (
 	// ErrBatchFull signfies that the batch has reached full capacity
-	// and cannot accept more entires.
+	// and cannot accept more entries.
 	ErrBatchFull = errors.New("batch is full")
 	// ErrInvalidType is returned for any APMData that is not Lambda type
 	ErrInvalidType = errors.New("only accepts lambda type data")
@@ -62,9 +62,9 @@ func NewBatch(maxSize int, maxAgeInSeconds int, metadata []byte) *BatchData {
 }
 
 // Add adds a new entry to the batch. Returns ErrBatchFull
-// if batch has reached it's maximum size.
+// if batch has reached its maximum size.
 func (b *BatchData) Add(d APMData) error {
-	if b.count >= b.maxSize {
+	if b.count == b.maxSize {
 		return ErrBatchFull
 	}
 	if d.Type != Lambda {
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
index b58f5787..b8e6d7c3 100644
--- a/apmproxy/batch_test.go
+++ b/apmproxy/batch_test.go
@@ -48,7 +48,7 @@ func TestReset(t *testing.T) {
 	b.Reset()
 
 	assert.Equal(t, 0, b.Count())
-	assert.Equal(t, 0, b.age)
+	assert.Equal(t, int64(0), b.age)
 }
 
 func TestShouldShip_ReasonSize(t *testing.T) {
@@ -65,18 +65,14 @@ func TestShouldShip_ReasonSize(t *testing.T) {
 }
 
 func TestShouldShip_ReasonAge(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping testing in short mode")
-	}
-
-	b := NewBatch(10, 10, []byte(metadata))
+	b := NewBatch(10, 2, []byte(metadata))
 
 	assert.False(t, b.ShouldShip())
 	require.NoError(t, b.Add(APMData{Type: Lambda}))
 
-	<-time.After(11 * time.Second)
+	<-time.After(3 * time.Second)
 
-	// Should flush after 10 seconds
+	// Should be ready for send after 3 seconds
 	require.Equal(t, 1, b.Count())
 	assert.True(t, b.ShouldShip())
 }
diff --git a/main_test.go b/main_test.go
index e31579c3..385441d6 100644
--- a/main_test.go
+++ b/main_test.go
@@ -694,7 +694,7 @@ func TestInfoRequestHangs(t *testing.T) {
 	lambdaServerInternals := newMockLambdaServer(t, logsapiAddr, eventsChannel, l)
 
 	eventsChain := []MockEvent{
-		{Type: InvokeStandardInfo, APMServerBehavior: Hangs, ExecutionDuration: 1, Timeout: 500},
+		{Type: InvokeStandardInfo, APMServerBehavior: Hangs, ExecutionDuration: 1, Timeout: 5},
 	}
 	eventQueueGenerator(eventsChain, eventsChannel)
 	select {

From 632d3953ab63074a31e6d34748c658cf44c68ee9 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Thu, 29 Sep 2022 15:21:18 +0800
Subject: [PATCH 07/15] Address review comments

---
 apmproxy/apmserver.go      |  3 ++-
 apmproxy/apmserver_test.go | 13 +++++++++----
 apmproxy/batch.go          | 27 ++++++++++++++-------------
 apmproxy/batch_test.go     | 14 +++++++-------
 apmproxy/client.go         |  4 ++--
 apmproxy/option.go         | 24 ++++++++++++++++++++++++
 logsapi/event.go           |  2 +-
 7 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index dbbbb794..d32ba97f 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -51,7 +51,8 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 		select {
 		case <-ctx.Done():
 			c.logger.Debug("Invocation context cancelled, not processing any more agent data")
-			return c.sendBatch(ctx)
+			// Any remaining data in the batch will be handled by FlushAPMData
+			return nil
 		case apmData := <-c.DataChannel:
 			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
 				return fmt.Errorf("error sending to APM server, skipping: %v", err)
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index 604faaab..f040afa3 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -630,12 +630,14 @@ func TestForwardApmData(t *testing.T) {
 	agentData := fmt.Sprintf("%s\n%s", metadata, `{"log": {"message": "test"}}`)
 	lambdaData := `{"log": {"message": "test"}}`
 	metaAvailable := make(chan struct{})
+	maxBatchAge := 500 * time.Millisecond
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
 		apmproxy.WithMetadataAvailableIndicator(metaAvailable),
-		// Use unbuffered channel for ease of testing
-		apmproxy.WithAgentDataBufferSize(0),
+		apmproxy.WithAgentDataBufferSize(10),
+		// Configure a small batch age for ease of testing
+		apmproxy.WithMaxBatchAge(maxBatchAge),
 	)
 	require.NoError(t, err)
 
@@ -668,6 +670,10 @@ func TestForwardApmData(t *testing.T) {
 	expected.WriteString(metadata)
 	// Send multiple lambda logs to batch data
 	for i := 0; i < 5; i++ {
+		if i == 4 {
+			// Wait for batch age to make sure the batch is mature to be sent
+			<-time.After(maxBatchAge + time.Millisecond)
+		}
 		apmClient.DataChannel <- apmproxy.APMData{
 			Data: []byte(lambdaData),
 			Type: apmproxy.Lambda,
@@ -676,10 +682,9 @@ func TestForwardApmData(t *testing.T) {
 		expected.WriteString(lambdaData)
 	}
 
-	// Trigger a batch send by cancelling context
-	cancel()
 	assertGzipBody(expected.String())
 	// Wait for ForwardApmData to exit
+	cancel()
 	wg.Wait()
 }
 
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
index 37860340..ba64340f 100644
--- a/apmproxy/batch.go
+++ b/apmproxy/batch.go
@@ -36,26 +36,27 @@ var (
 
 var (
 	maxSizeThreshold = 0.9
+	zeroTime         = time.Time{}
 )
 
 // BatchData represents a batch of data without metadata
 // that will be sent to APMServer. BatchData is not safe
 // concurrent access.
 type BatchData struct {
-	metadataBytes   int
-	buf             bytes.Buffer
-	count           int
-	maxSize         int
-	maxAgeInSeconds int
-	age             int64
+	metadataBytes int
+	buf           bytes.Buffer
+	count         int
+	age           time.Time
+	maxSize       int
+	maxAge        time.Duration
 }
 
 // NewBatch creates a new BatchData which can accept a
 // maximum number of entires as specified by the argument
-func NewBatch(maxSize int, maxAgeInSeconds int, metadata []byte) *BatchData {
+func NewBatch(maxSize int, maxAge time.Duration, metadata []byte) *BatchData {
 	b := &BatchData{
-		maxSize:         maxSize,
-		maxAgeInSeconds: maxAgeInSeconds,
+		maxSize: maxSize,
+		maxAge:  maxAge,
 	}
 	b.metadataBytes, _ = b.buf.Write(metadata)
 	return b
@@ -81,7 +82,7 @@ func (b *BatchData) Add(d APMData) error {
 	}
 	if b.count == 0 {
 		// For first entry, set the age of the batch
-		b.age = time.Now().Unix()
+		b.age = time.Now()
 	}
 	b.count++
 	return nil
@@ -98,13 +99,13 @@ func (b *BatchData) Count() int {
 // 1. max size is greater than threshold (90% of maxSize)
 // 2. batch is older than maturity age
 func (b *BatchData) ShouldShip() bool {
-	return b.count >= int(float64(b.maxSize)*maxSizeThreshold) ||
-		(b.age > 0 && time.Now().Unix()-b.age > int64(b.maxAgeInSeconds))
+	return (b.count >= int(float64(b.maxSize)*maxSizeThreshold)) ||
+		(!b.age.IsZero() && time.Since(b.age) > b.maxAge)
 }
 
 // Reset resets the batch to prepare for new set of data
 func (b *BatchData) Reset() {
-	b.count, b.age = 0, 0
+	b.count, b.age = 0, zeroTime
 	b.buf.Truncate(b.metadataBytes)
 }
 
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
index b8e6d7c3..22a5de08 100644
--- a/apmproxy/batch_test.go
+++ b/apmproxy/batch_test.go
@@ -29,12 +29,12 @@ const metadata = `{"metadata":{"service":{"agent":{"name":"apm-lambda-extension"
 
 func TestAdd(t *testing.T) {
 	t.Run("empty", func(t *testing.T) {
-		b := NewBatch(1, 100, []byte(metadata))
+		b := NewBatch(1, time.Hour, []byte(metadata))
 
 		assert.NoError(t, b.Add(APMData{Type: Lambda}))
 	})
 	t.Run("full", func(t *testing.T) {
-		b := NewBatch(1, 100, []byte(metadata))
+		b := NewBatch(1, time.Hour, []byte(metadata))
 		require.NoError(t, b.Add(APMData{Type: Lambda}))
 
 		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{Type: Lambda}))
@@ -42,17 +42,17 @@ func TestAdd(t *testing.T) {
 }
 
 func TestReset(t *testing.T) {
-	b := NewBatch(1, 100, []byte(metadata))
+	b := NewBatch(1, time.Hour, []byte(metadata))
 	require.NoError(t, b.Add(APMData{Type: Lambda}))
 	require.Equal(t, 1, b.Count())
 	b.Reset()
 
 	assert.Equal(t, 0, b.Count())
-	assert.Equal(t, int64(0), b.age)
+	assert.True(t, b.age.IsZero())
 }
 
 func TestShouldShip_ReasonSize(t *testing.T) {
-	b := NewBatch(10, 100, []byte(metadata))
+	b := NewBatch(10, time.Hour, []byte(metadata))
 
 	// Should flush at 90% full
 	for i := 0; i < 9; i++ {
@@ -65,12 +65,12 @@ func TestShouldShip_ReasonSize(t *testing.T) {
 }
 
 func TestShouldShip_ReasonAge(t *testing.T) {
-	b := NewBatch(10, 2, []byte(metadata))
+	b := NewBatch(10, time.Second, []byte(metadata))
 
 	assert.False(t, b.ShouldShip())
 	require.NoError(t, b.Add(APMData{Type: Lambda}))
 
-	<-time.After(3 * time.Second)
+	<-time.After(2 * time.Second)
 
 	// Should be ready for send after 3 seconds
 	require.Equal(t, 1, b.Count())
diff --git a/apmproxy/client.go b/apmproxy/client.go
index b5a44fcc..ab5a93a5 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -47,7 +47,7 @@ const (
 	defaultReceiverAddr                       = ":8200"
 	defaultAgentBufferSize      int           = 100
 	defaultMaxBatchSize         int           = 50
-	defaultMaxBatchAge          int           = 10
+	defaultMaxBatchAge          time.Duration = 10 * time.Second
 )
 
 // Client is the client used to communicate with the apm server.
@@ -70,7 +70,7 @@ type Client struct {
 
 	batch        *BatchData
 	maxBatchSize int
-	maxBatchAge  int
+	maxBatchAge  time.Duration
 
 	metadataAvailable chan<- struct{}
 }
diff --git a/apmproxy/option.go b/apmproxy/option.go
index 8ec4ba75..48cde77a 100644
--- a/apmproxy/option.go
+++ b/apmproxy/option.go
@@ -78,6 +78,8 @@ func WithAgentDataBufferSize(size int) Option {
 	}
 }
 
+// WithLogger configures a custom zap logger to be used by
+// the client.
 func WithLogger(logger *zap.SugaredLogger) Option {
 	return func(c *Client) {
 		c.logger = logger
@@ -91,3 +93,25 @@ func WithMetadataAvailableIndicator(ch chan<- struct{}) Option {
 		c.metadataAvailable = ch
 	}
 }
+
+// WithMaxBatchSize configures the maximum batch size for
+// the payload sent to the APMServer
+func WithMaxBatchSize(size int) Option {
+	return func(c *Client) {
+		c.maxBatchSize = size
+	}
+}
+
+// WithMaxBatchAge configures the maximum age of the batch
+// before it is sent to APMServer. Age is measured from the
+// time the first entry is added in the batch.
+//
+// It is possible for batch age to be greater than the
+// configured max batch age when sending since a send is
+// triggered by a new log event and log events can be delayed
+// due to various factors.
+func WithMaxBatchAge(age time.Duration) Option {
+	return func(c *Client) {
+		c.maxBatchAge = age
+	}
+}
diff --git a/logsapi/event.go b/logsapi/event.go
index c961982e..fa6f62c8 100644
--- a/logsapi/event.go
+++ b/logsapi/event.go
@@ -100,7 +100,7 @@ func (lc *Client) ProcessLogs(
 					lc.logger.Debug("Log API runtimeDone event request id didn't match")
 				}
 			case PlatformLogsDropped:
-				lc.logger.Warn("Logs dropped due to extension falling behind: %v", logEvent.Record)
+				lc.logger.Warnf("Logs dropped due to extension falling behind: %v", logEvent.Record)
 			case FunctionLog:
 				lc.logger.Debug("Received function log")
 				processedLog, err := ProcessFunctionLog(

From 889885446f0f40e7570379177e7e120e2dab7498 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 18 Oct 2022 11:17:57 +0800
Subject: [PATCH 08/15] Refactor logsapi to be independent of metadata status

---
 apmproxy/apmserver.go      | 72 ++++++++++++++++++++++++++++----------
 apmproxy/apmserver_test.go | 10 +++---
 apmproxy/client.go         |  9 +++--
 apmproxy/option.go         |  4 +--
 apmproxy/receiver.go       |  6 +++-
 apmproxy/receiver_test.go  |  2 +-
 app/app.go                 |  3 +-
 logsapi/client.go          |  3 +-
 logsapi/client_test.go     | 33 ++++-------------
 logsapi/event.go           |  4 +--
 logsapi/option.go          |  8 -----
 logsapi/route_handlers.go  | 24 +++++--------
 12 files changed, 92 insertions(+), 86 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index d32ba97f..66656338 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -47,15 +47,33 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 	if c.IsUnhealthy() {
 		return nil
 	}
+
+	// Wait for metadata to be available, metadata will be available as soon as
+	// the first agent data is processed.
+	select {
+	case <-ctx.Done():
+		c.logger.Debug("Invocation context cancelled, not processing any more agent data")
+		return nil
+	case data := <-c.AgentDataChannel:
+		if err := c.forwardAPMDataByType(ctx, data); err != nil {
+			return err
+		}
+	case <-c.metadataAvailable:
+	}
+
+	// Process lambda and agent data after metadata is available
 	for {
 		select {
 		case <-ctx.Done():
 			c.logger.Debug("Invocation context cancelled, not processing any more agent data")
-			// Any remaining data in the batch will be handled by FlushAPMData
 			return nil
-		case apmData := <-c.DataChannel:
-			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
-				return fmt.Errorf("error sending to APM server, skipping: %v", err)
+		case data := <-c.AgentDataChannel:
+			if err := c.forwardAPMDataByType(ctx, data); err != nil {
+				return err
+			}
+		case data := <-c.LambdaDataChannel:
+			if err := c.forwardAPMDataByType(ctx, data); err != nil {
+				return err
 			}
 		}
 	}
@@ -68,19 +86,48 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 		return
 	}
 	c.logger.Debug("Flush started - Checking for agent data")
+
+	// Flush agent data first to make sure metadata is available if possible
+	func() {
+		for {
+			select {
+			case <-ctx.Done():
+				c.logger.Debugf("Failed to flush completely, may result in data drop")
+			case data := <-c.AgentDataChannel:
+				if err := c.forwardAPMDataByType(ctx, data); err != nil {
+					c.logger.Errorf("Error sending to APM Server, skipping: %v", err)
+				}
+			default:
+				c.logger.Debug("Flush ended for agent data - no data in buffer")
+				return
+			}
+		}
+	}()
+
+	// If metadata still not available then fail fast
+	select {
+	case <-c.metadataAvailable:
+	default:
+		c.logger.Warnf("Metadata not available at flush, skipping sending lambda data to APM Server")
+		return
+	}
+
+	// Flush lambda data
 	for {
 		select {
-		case apmData := <-c.DataChannel:
-			c.logger.Debug("Flush in progress - Processing agent data")
+		case <-ctx.Done():
+			c.logger.Debugf("Failed to flush completely, may result in data drop")
+		case apmData := <-c.LambdaDataChannel:
+			c.logger.Debug("Flush in progress - Processing lambda data")
 			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
 		default:
-			c.logger.Debug("Flush ended - No agent data on buffer")
 			// Flush any remaining data in batch
 			if err := c.sendBatch(ctx); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
+			c.logger.Debug("Flush ended for lambda data - no data in buffer")
 			return
 		}
 	}
@@ -279,17 +326,6 @@ func (c *Client) ComputeGracePeriod() time.Duration {
 	return time.Duration((gracePeriodWithoutJitter + jitter*gracePeriodWithoutJitter) * float64(time.Second))
 }
 
-// EnqueueAPMData adds a apm data struct to the agent data channel, effectively queueing for a send
-// to the APM server.
-func (c *Client) EnqueueAPMData(apmData APMData) {
-	select {
-	case c.DataChannel <- apmData:
-		c.logger.Debug("Adding APM data to buffer to be sent to apm server")
-	default:
-		c.logger.Warn("Channel full: dropping a subset of agent data")
-	}
-}
-
 // ShouldFlush returns true if the client should flush APM data after processing the event.
 func (c *Client) ShouldFlush() bool {
 	return c.sendStrategy == SyncFlush
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index f040afa3..ea149317 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -630,7 +630,7 @@ func TestForwardApmData(t *testing.T) {
 	agentData := fmt.Sprintf("%s\n%s", metadata, `{"log": {"message": "test"}}`)
 	lambdaData := `{"log": {"message": "test"}}`
 	metaAvailable := make(chan struct{})
-	maxBatchAge := 500 * time.Millisecond
+	maxBatchAge := 1 * time.Second
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
@@ -651,7 +651,7 @@ func TestForwardApmData(t *testing.T) {
 	}()
 
 	// Populate metadata by sending agent data
-	apmClient.DataChannel <- apmproxy.APMData{
+	apmClient.AgentDataChannel <- apmproxy.APMData{
 		Data: []byte(agentData),
 		Type: apmproxy.Agent,
 	}
@@ -674,7 +674,7 @@ func TestForwardApmData(t *testing.T) {
 			// Wait for batch age to make sure the batch is mature to be sent
 			<-time.After(maxBatchAge + time.Millisecond)
 		}
-		apmClient.DataChannel <- apmproxy.APMData{
+		apmClient.LambdaDataChannel <- apmproxy.APMData{
 			Data: []byte(lambdaData),
 			Type: apmproxy.Lambda,
 		}
@@ -724,9 +724,9 @@ func BenchmarkFlushAPMData(b *testing.B) {
 	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		apmClient.DataChannel <- agentAPMData
+		apmClient.AgentDataChannel <- agentAPMData
 		for j := 0; j < 99; j++ {
-			apmClient.DataChannel <- apmproxy.APMData{
+			apmClient.LambdaDataChannel <- apmproxy.APMData{
 				Data: []byte("this is test log"),
 				Type: apmproxy.Lambda,
 			}
diff --git a/apmproxy/client.go b/apmproxy/client.go
index ab5a93a5..4699441c 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -46,6 +46,7 @@ const (
 	defaultDataForwarderTimeout time.Duration = 3 * time.Second
 	defaultReceiverAddr                       = ":8200"
 	defaultAgentBufferSize      int           = 100
+	defaultLambdaBufferSize     int           = 100
 	defaultMaxBatchSize         int           = 50
 	defaultMaxBatchAge          time.Duration = 10 * time.Second
 )
@@ -54,7 +55,8 @@ const (
 type Client struct {
 	mu                sync.RWMutex
 	bufferPool        sync.Pool
-	DataChannel       chan APMData
+	AgentDataChannel  chan APMData
+	LambdaDataChannel chan APMData
 	client            *http.Client
 	Status            Status
 	ReconnectionCount int
@@ -72,7 +74,7 @@ type Client struct {
 	maxBatchSize int
 	maxBatchAge  time.Duration
 
-	metadataAvailable chan<- struct{}
+	metadataAvailable chan struct{}
 }
 
 func NewClient(opts ...Option) (*Client, error) {
@@ -80,7 +82,8 @@ func NewClient(opts ...Option) (*Client, error) {
 		bufferPool: sync.Pool{New: func() interface{} {
 			return &bytes.Buffer{}
 		}},
-		DataChannel: make(chan APMData, defaultAgentBufferSize),
+		AgentDataChannel:  make(chan APMData, defaultAgentBufferSize),
+		LambdaDataChannel: make(chan APMData, defaultLambdaBufferSize),
 		client: &http.Client{
 			Transport: http.DefaultTransport.(*http.Transport).Clone(),
 		},
diff --git a/apmproxy/option.go b/apmproxy/option.go
index 48cde77a..34622fb1 100644
--- a/apmproxy/option.go
+++ b/apmproxy/option.go
@@ -74,7 +74,7 @@ func WithSendStrategy(strategy SendStrategy) Option {
 // WithAgentDataBufferSize sets the agent data buffer size.
 func WithAgentDataBufferSize(size int) Option {
 	return func(c *Client) {
-		c.DataChannel = make(chan APMData, size)
+		c.AgentDataChannel = make(chan APMData, size)
 	}
 }
 
@@ -88,7 +88,7 @@ func WithLogger(logger *zap.SugaredLogger) Option {
 
 // WithMetadataAvailableIndicator configures a channel
 // which will broadcast metadata available event on close
-func WithMetadataAvailableIndicator(ch chan<- struct{}) Option {
+func WithMetadataAvailableIndicator(ch chan struct{}) Option {
 	return func(c *Client) {
 		c.metadataAvailable = ch
 	}
diff --git a/apmproxy/receiver.go b/apmproxy/receiver.go
index 74843ad8..3361b8a4 100644
--- a/apmproxy/receiver.go
+++ b/apmproxy/receiver.go
@@ -144,7 +144,11 @@ func (c *Client) handleIntakeV2Events() func(w http.ResponseWriter, r *http.Requ
 		}
 
 		if len(agentData.Data) != 0 {
-			c.EnqueueAPMData(agentData)
+			select {
+			case c.AgentDataChannel <- agentData:
+			default:
+				c.logger.Warnf("Channel full: dropping a subset of agent data")
+			}
 		}
 
 		if agentFlushed {
diff --git a/apmproxy/receiver_test.go b/apmproxy/receiver_test.go
index 2179a1cc..e908eed6 100644
--- a/apmproxy/receiver_test.go
+++ b/apmproxy/receiver_test.go
@@ -249,7 +249,7 @@ func Test_handleIntakeV2EventsNoQueryParam(t *testing.T) {
 	resp, err := client.Do(req)
 	require.NoError(t, err)
 	select {
-	case <-apmClient.DataChannel:
+	case <-apmClient.AgentDataChannel:
 	case <-time.After(1 * time.Second):
 		t.Log("Timed out waiting for server to send FuncDone signal")
 		t.Fail()
diff --git a/app/app.go b/app/app.go
index 9d39bffb..3d0416d7 100644
--- a/app/app.go
+++ b/app/app.go
@@ -88,10 +88,9 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		lc, err := logsapi.NewClient(
 			logsapi.WithLogsAPIBaseURL(fmt.Sprintf("http://%s", c.awsLambdaRuntimeAPI)),
 			logsapi.WithListenerAddress(addr),
-			logsapi.WithLogBuffer(100),
+			logsapi.WithLogBuffer(0),
 			logsapi.WithLogger(app.logger),
 			logsapi.WithSubscriptionTypes(subscriptionLogStreams...),
-			logsapi.WithMetadataAvailableIndicator(c.metadataAvailable),
 		)
 		if err != nil {
 			return nil, err
diff --git a/logsapi/client.go b/logsapi/client.go
index b0d7bcf6..b7d18ac0 100644
--- a/logsapi/client.go
+++ b/logsapi/client.go
@@ -52,7 +52,6 @@ type Client struct {
 	logsAPIBaseURL           string
 	logsAPISubscriptionTypes []SubscriptionType
 	logsChannel              chan LogEvent
-	metadataAvailable        <-chan struct{}
 	listenerAddr             string
 	server                   *http.Server
 	logger                   *zap.SugaredLogger
@@ -70,7 +69,7 @@ func NewClient(opts ...ClientOption) (*Client, error) {
 	}
 
 	mux := http.NewServeMux()
-	mux.HandleFunc("/", handleLogEventsRequest(c.logger, c.logsChannel, c.metadataAvailable))
+	mux.HandleFunc("/", handleLogEventsRequest(c.logger, c.logsChannel))
 
 	c.server.Handler = mux
 
diff --git a/logsapi/client_test.go b/logsapi/client_test.go
index 175f16f4..b29d8cb8 100644
--- a/logsapi/client_test.go
+++ b/logsapi/client_test.go
@@ -123,32 +123,13 @@ func TestSubscribeAWSRequest(t *testing.T) {
 	addr := "localhost:8080"
 
 	testCases := map[string]struct {
-		getOpts          func() []logsapi.ClientOption
-		expectedRespCode int
+		opts []logsapi.ClientOption
 	}{
-		"before_metadata_available": {
-			getOpts: func() []logsapi.ClientOption {
-				metaAvailable := make(chan struct{})
-				return []logsapi.ClientOption{
-					logsapi.WithListenerAddress(addr),
-					logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
-					logsapi.WithMetadataAvailableIndicator(metaAvailable),
-				}
-			},
-			expectedRespCode: http.StatusInternalServerError,
-		},
-		"after_metadata_available": {
-			getOpts: func() []logsapi.ClientOption {
-				metaAvailable := make(chan struct{})
-				// Close the channel to signal metadata is available
-				defer close(metaAvailable)
-				return []logsapi.ClientOption{
-					logsapi.WithListenerAddress(addr),
-					logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
-					logsapi.WithMetadataAvailableIndicator(metaAvailable),
-				}
+		"valid response": {
+			opts: []logsapi.ClientOption{
+				logsapi.WithListenerAddress(addr),
+				logsapi.WithLogger(zaptest.NewLogger(t).Sugar()),
 			},
-			expectedRespCode: http.StatusOK,
 		},
 	}
 	for name, tc := range testCases {
@@ -163,7 +144,7 @@ func TestSubscribeAWSRequest(t *testing.T) {
 			defer s.Close()
 
 			cOpts := append(
-				tc.getOpts(),
+				tc.opts,
 				logsapi.WithLogsAPIBaseURL(s.URL),
 				logsapi.WithLogBuffer(1),
 				logsapi.WithSubscriptionTypes(logsapi.Platform, logsapi.Function),
@@ -188,7 +169,7 @@ func TestSubscribeAWSRequest(t *testing.T) {
 			// Send the request to the logs listener
 			rsp, err := http.DefaultClient.Do(req)
 			require.NoError(t, err)
-			require.Equal(t, tc.expectedRespCode, rsp.StatusCode)
+			require.Equal(t, http.StatusOK, rsp.StatusCode)
 			require.NoError(t, rsp.Body.Close())
 			require.NoError(t, c.Shutdown())
 		})
diff --git a/logsapi/event.go b/logsapi/event.go
index fa6f62c8..9c891512 100644
--- a/logsapi/event.go
+++ b/logsapi/event.go
@@ -93,7 +93,7 @@ func (lc *Client) ProcessLogs(
 					if err != nil {
 						lc.logger.Errorf("Error processing Lambda platform metrics: %v", err)
 					} else {
-						apmClient.EnqueueAPMData(processedMetrics)
+						apmClient.LambdaDataChannel <- processedMetrics
 					}
 				} else {
 					lc.logger.Warn("report event request id didn't match the previous event id")
@@ -111,7 +111,7 @@ func (lc *Client) ProcessLogs(
 				if err != nil {
 					lc.logger.Errorf("Error processing function log : %v", err)
 				} else {
-					apmClient.EnqueueAPMData(processedLog)
+					apmClient.LambdaDataChannel <- processedLog
 				}
 			}
 		case <-ctx.Done():
diff --git a/logsapi/option.go b/logsapi/option.go
index 677cbf4d..e431477e 100644
--- a/logsapi/option.go
+++ b/logsapi/option.go
@@ -55,11 +55,3 @@ func WithSubscriptionTypes(types ...SubscriptionType) ClientOption {
 		c.logsAPISubscriptionTypes = types
 	}
 }
-
-// WithMetadataAvailableIndicator configures a channel
-// which will broadcast metadata available event on close
-func WithMetadataAvailableIndicator(ch <-chan struct{}) ClientOption {
-	return func(c *Client) {
-		c.metadataAvailable = ch
-	}
-}
diff --git a/logsapi/route_handlers.go b/logsapi/route_handlers.go
index 7c10a956..6dbca1b1 100644
--- a/logsapi/route_handlers.go
+++ b/logsapi/route_handlers.go
@@ -25,22 +25,8 @@ import (
 	"go.uber.org/zap"
 )
 
-func handleLogEventsRequest(
-	logger *zap.SugaredLogger,
-	logsChannel chan LogEvent,
-	metadataAvailable <-chan struct{},
-) func(w http.ResponseWriter, r *http.Request) {
+func handleLogEventsRequest(logger *zap.SugaredLogger, logsChannel chan LogEvent) func(w http.ResponseWriter, r *http.Request) {
 	return func(w http.ResponseWriter, r *http.Request) {
-		select {
-		case <-metadataAvailable:
-		case <-time.After(100 * time.Millisecond):
-			// Without metadata the events cannot be processed
-			// thus we indicate lambda to keep the buffer
-			logger.Warnf("Metadata unavailable, signaling lambda to buffer log events")
-			w.WriteHeader(http.StatusInternalServerError)
-			return
-		}
-
 		var logEvents []LogEvent
 		if err := json.NewDecoder(r.Body).Decode(&logEvents); err != nil {
 			logger.Errorf("Error unmarshalling log events: %+v", err)
@@ -54,7 +40,13 @@ func handleLogEventsRequest(
 				w.WriteHeader(http.StatusInternalServerError)
 				continue
 			}
-			logsChannel <- logEvents[idx]
+			select {
+			case logsChannel <- logEvents[idx]:
+			case <-r.Context().Done():
+				logger.Warnf("Failed to enqueue event, signaling lambda to retry")
+				w.WriteHeader(http.StatusInternalServerError)
+				return
+			}
 		}
 	}
 }

From 2d7eeb76523e654b3b3199bd14cf67a496e443c1 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 18 Oct 2022 11:20:56 +0800
Subject: [PATCH 09/15] Address review comments

---
 apmproxy/apmserver.go | 10 +++++-----
 apmproxy/batch.go     |  2 +-
 apmproxy/client.go    |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 66656338..275f10df 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -138,7 +138,7 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 // The function compresses the APM agent data, if it's not already compressed.
 // It sets the APM transport status to failing upon errors, as part of the backoff
 // strategy.
-func (c *Client) PostToApmServer(ctx context.Context, agentData APMData) error {
+func (c *Client) PostToApmServer(ctx context.Context, apmData APMData) error {
 	// todo: can this be a streaming or streaming style call that keeps the
 	//       connection open across invocations?
 	if c.IsUnhealthy() {
@@ -146,11 +146,11 @@ func (c *Client) PostToApmServer(ctx context.Context, agentData APMData) error {
 	}
 
 	endpointURI := "intake/v2/events"
-	encoding := agentData.ContentEncoding
+	encoding := apmData.ContentEncoding
 
 	var r io.Reader
-	if agentData.ContentEncoding != "" {
-		r = bytes.NewReader(agentData.Data)
+	if apmData.ContentEncoding != "" {
+		r = bytes.NewReader(apmData.Data)
 	} else {
 		encoding = "gzip"
 		buf := c.bufferPool.Get().(*bytes.Buffer)
@@ -162,7 +162,7 @@ func (c *Client) PostToApmServer(ctx context.Context, agentData APMData) error {
 		if err != nil {
 			return err
 		}
-		if _, err := gw.Write(agentData.Data); err != nil {
+		if _, err := gw.Write(apmData.Data); err != nil {
 			return fmt.Errorf("failed to compress data: %w", err)
 		}
 		if err := gw.Close(); err != nil {
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
index ba64340f..d39ba3b2 100644
--- a/apmproxy/batch.go
+++ b/apmproxy/batch.go
@@ -52,7 +52,7 @@ type BatchData struct {
 }
 
 // NewBatch creates a new BatchData which can accept a
-// maximum number of entires as specified by the argument
+// maximum number of entries as specified by the argument
 func NewBatch(maxSize int, maxAge time.Duration, metadata []byte) *BatchData {
 	b := &BatchData{
 		maxSize: maxSize,
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 4699441c..6f9d62e5 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -48,7 +48,7 @@ const (
 	defaultAgentBufferSize      int           = 100
 	defaultLambdaBufferSize     int           = 100
 	defaultMaxBatchSize         int           = 50
-	defaultMaxBatchAge          time.Duration = 10 * time.Second
+	defaultMaxBatchAge          time.Duration = 2 * time.Second
 )
 
 // Client is the client used to communicate with the apm server.

From 62415bb44d834ee16049d0d3ad27be4e85beb74f Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 18 Oct 2022 18:34:12 +0800
Subject: [PATCH 10/15] Add buffering behavior for main_test

---
 app/app.go   |   2 +-
 main_test.go | 113 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 83 insertions(+), 32 deletions(-)

diff --git a/app/app.go b/app/app.go
index 3d0416d7..3f928b35 100644
--- a/app/app.go
+++ b/app/app.go
@@ -88,7 +88,7 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		lc, err := logsapi.NewClient(
 			logsapi.WithLogsAPIBaseURL(fmt.Sprintf("http://%s", c.awsLambdaRuntimeAPI)),
 			logsapi.WithListenerAddress(addr),
-			logsapi.WithLogBuffer(0),
+			logsapi.WithLogBuffer(100),
 			logsapi.WithLogger(app.logger),
 			logsapi.WithSubscriptionTypes(subscriptionLogStreams...),
 		)
diff --git a/main_test.go b/main_test.go
index 385441d6..cf5ee376 100644
--- a/main_test.go
+++ b/main_test.go
@@ -156,6 +156,21 @@ func newMockApmServer(t *testing.T, l *zap.SugaredLogger) (*MockServerInternals,
 
 func newMockLambdaServer(t *testing.T, logsapiAddr string, eventsChannel chan MockEvent, l *zap.SugaredLogger) *MockServerInternals {
 	var lambdaServerInternals MockServerInternals
+	// A big queue that can hold all the events required for a test
+	mockLogEventQ := make(chan logsapi.LogEvent, 100)
+	ctx, cancel := context.WithCancel(context.Background())
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		startLogSender(ctx, mockLogEventQ, logsapiAddr, l)
+	}()
+	t.Cleanup(func() {
+		cancel()
+		wg.Wait()
+	})
+
 	lambdaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.RequestURI {
 		// Extension registration request
@@ -171,19 +186,19 @@ func newMockLambdaServer(t *testing.T, logsapiAddr string, eventsChannel chan Mo
 			}
 		case "/2020-01-01/extension/event/next":
 			lambdaServerInternals.WaitGroup.Wait()
-			currId := uuid.New().String()
+			currID := uuid.New().String()
 			select {
 			case nextEvent := <-eventsChannel:
-				sendNextEventInfo(w, currId, nextEvent, l)
-				go processMockEvent(currId, nextEvent, os.Getenv("ELASTIC_APM_DATA_RECEIVER_SERVER_PORT"), logsapiAddr, &lambdaServerInternals, l)
+				sendNextEventInfo(w, currID, nextEvent.Timeout, nextEvent.Type == Shutdown, l)
+				go processMockEvent(mockLogEventQ, currID, nextEvent, os.Getenv("ELASTIC_APM_DATA_RECEIVER_SERVER_PORT"), &lambdaServerInternals, l)
 			default:
 				finalShutDown := MockEvent{
 					Type:              Shutdown,
 					ExecutionDuration: 0,
 					Timeout:           0,
 				}
-				sendNextEventInfo(w, currId, finalShutDown, l)
-				go processMockEvent(currId, finalShutDown, os.Getenv("ELASTIC_APM_DATA_RECEIVER_SERVER_PORT"), logsapiAddr, &lambdaServerInternals, l)
+				sendNextEventInfo(w, currID, finalShutDown.Timeout, true, l)
+				go processMockEvent(mockLogEventQ, currID, finalShutDown, os.Getenv("ELASTIC_APM_DATA_RECEIVER_SERVER_PORT"), &lambdaServerInternals, l)
 			}
 		// Logs API subscription request
 		case "/2020-08-15/logs":
@@ -209,14 +224,12 @@ func newMockLambdaServer(t *testing.T, logsapiAddr string, eventsChannel chan Mo
 
 func newTestStructs(t *testing.T) chan MockEvent {
 	http.DefaultServeMux = new(http.ServeMux)
-	_, cancel := context.WithCancel(context.Background())
-	t.Cleanup(func() { cancel() })
 	eventsChannel := make(chan MockEvent, 100)
 	return eventsChannel
 }
 
-func processMockEvent(currId string, event MockEvent, extensionPort string, logsapiAddr string, internals *MockServerInternals, l *zap.SugaredLogger) {
-	sendLogEvent(logsapiAddr, currId, logsapi.PlatformStart, l)
+func processMockEvent(q chan<- logsapi.LogEvent, currID string, event MockEvent, extensionPort string, internals *MockServerInternals, l *zap.SugaredLogger) {
+	queueLogEvent(q, currID, logsapi.PlatformStart, l)
 	client := http.Client{}
 
 	// Use a custom transport with a low timeout
@@ -311,22 +324,23 @@ func processMockEvent(currId string, event MockEvent, extensionPort string, logs
 	case Shutdown:
 	}
 	if sendRuntimeDone {
-		sendLogEvent(logsapiAddr, currId, logsapi.PlatformRuntimeDone, l)
+		l.Info("Queueuing runtime done event")
+		queueLogEvent(q, currID, logsapi.PlatformRuntimeDone, l)
 	}
 	if sendMetrics {
-		sendLogEvent(logsapiAddr, currId, logsapi.PlatformReport, l)
+		queueLogEvent(q, currID, logsapi.PlatformReport, l)
 	}
 }
 
-func sendNextEventInfo(w http.ResponseWriter, id string, event MockEvent, l *zap.SugaredLogger) {
+func sendNextEventInfo(w http.ResponseWriter, id string, timeoutSec float64, shutdown bool, l *zap.SugaredLogger) {
 	nextEventInfo := extension.NextEventResponse{
 		EventType:          "INVOKE",
-		DeadlineMs:         time.Now().UnixNano()/int64(time.Millisecond) + int64(event.Timeout*1000),
+		DeadlineMs:         time.Now().UnixNano()/int64(time.Millisecond) + int64(timeoutSec*1000),
 		RequestID:          id,
 		InvokedFunctionArn: "arn:aws:lambda:eu-central-1:627286350134:function:main_unit_test",
 		Tracing:            extension.Tracing{},
 	}
-	if event.Type == Shutdown {
+	if shutdown {
 		nextEventInfo.EventType = "SHUTDOWN"
 	}
 
@@ -335,9 +349,9 @@ func sendNextEventInfo(w http.ResponseWriter, id string, event MockEvent, l *zap
 	}
 }
 
-func sendLogEvent(logsapiAddr string, requestId string, logEventType logsapi.LogEventType, l *zap.SugaredLogger) {
+func queueLogEvent(q chan<- logsapi.LogEvent, requestID string, logEventType logsapi.LogEventType, l *zap.SugaredLogger) {
 	record := logsapi.LogEventRecord{
-		RequestID: requestId,
+		RequestID: requestID,
 	}
 	if logEventType == logsapi.PlatformReport {
 		record.Metrics = logsapi.PlatformMetrics{
@@ -359,27 +373,64 @@ func sendLogEvent(logsapiAddr string, requestId string, logEventType logsapi.Log
 	bufRecord := new(bytes.Buffer)
 	if err := json.NewEncoder(bufRecord).Encode(record); err != nil {
 		l.Errorf("Could not encode record : %v", err)
-		return
 	}
 	logEvent.StringRecord = bufRecord.String()
+	q <- logEvent
+}
 
-	// Convert full log event to JSON
-	bufLogEvent := new(bytes.Buffer)
-	if err := json.NewEncoder(bufLogEvent).Encode([]logsapi.LogEvent{logEvent}); err != nil {
-		l.Errorf("Could not encode record : %v", err)
-		return
+func startLogSender(ctx context.Context, q <-chan logsapi.LogEvent, logsapiAddr string, l *zap.SugaredLogger) {
+	client := http.Client{
+		Timeout: 10 * time.Millisecond,
 	}
+	doSend := func(events []logsapi.LogEvent) error {
+		var buf bytes.Buffer
+		if err := json.NewEncoder(&buf).Encode(events); err != nil {
+			return err
+		}
 
-	req, err := http.NewRequest(http.MethodPost, "http://"+logsapiAddr, bufLogEvent)
-	if err != nil {
-		l.Errorf("Could not create logs api request: %v", err)
-		return
-	}
+		req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://%s", logsapiAddr), &buf)
+		if err != nil {
+			return err
+		}
 
-	client := http.Client{}
-	if _, err := client.Do(req); err != nil {
-		l.Errorf("Could not send log event : %v", err)
-		return
+		resp, err := client.Do(req)
+		if err != nil {
+			return err
+		}
+		if resp.StatusCode/100 != 2 {
+			return fmt.Errorf("received a non 2xx status code: %d", resp.StatusCode)
+		}
+		return nil
+	}
+
+	var batch []logsapi.LogEvent
+	flushInterval := time.NewTicker(100 * time.Millisecond)
+	defer flushInterval.Stop()
+	for {
+		select {
+		case <-flushInterval.C:
+			var trySend bool
+			for !trySend {
+				// TODO: @lahsivjar mock dropping of logs, batch age and batch size
+				// TODO: @lahsivjar is it possible for one batch to have platform.runtimeDone
+				// event in middle of the batch?
+				select {
+				case e := <-q:
+					batch = append(batch, e)
+				default:
+					trySend = true
+					if len(batch) > 0 {
+						if err := doSend(batch); err != nil {
+							l.Warnf("mock lambda API failed to send logs to the extension: %v", err)
+						} else {
+							batch = batch[:0]
+						}
+					}
+				}
+			}
+		case <-ctx.Done():
+			return
+		}
 	}
 }
 

From 7b8afdcc54ce93f316f82c200a934b2160231b87 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 18 Oct 2022 18:56:54 +0800
Subject: [PATCH 11/15] Remove Type field from APMData

---
 apmproxy/apmserver.go        | 61 +++++++++++++++++-------------------
 apmproxy/apmserver_test.go   |  5 +--
 apmproxy/batch.go            |  6 ----
 apmproxy/batch_test.go       | 12 +++----
 apmproxy/receiver.go         | 12 -------
 logsapi/functionlogs.go      |  1 -
 logsapi/functionlogs_test.go |  2 --
 logsapi/metrics.go           |  1 -
 logsapi/metrics_test.go      |  2 --
 9 files changed, 36 insertions(+), 66 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 275f10df..0be3e8ad 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -55,7 +55,7 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 		c.logger.Debug("Invocation context cancelled, not processing any more agent data")
 		return nil
 	case data := <-c.AgentDataChannel:
-		if err := c.forwardAPMDataByType(ctx, data); err != nil {
+		if err := c.forwardAgentData(ctx, data); err != nil {
 			return err
 		}
 	case <-c.metadataAvailable:
@@ -68,11 +68,11 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 			c.logger.Debug("Invocation context cancelled, not processing any more agent data")
 			return nil
 		case data := <-c.AgentDataChannel:
-			if err := c.forwardAPMDataByType(ctx, data); err != nil {
+			if err := c.forwardAgentData(ctx, data); err != nil {
 				return err
 			}
 		case data := <-c.LambdaDataChannel:
-			if err := c.forwardAPMDataByType(ctx, data); err != nil {
+			if err := c.forwardLambdaData(ctx, data); err != nil {
 				return err
 			}
 		}
@@ -94,7 +94,7 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 			case <-ctx.Done():
 				c.logger.Debugf("Failed to flush completely, may result in data drop")
 			case data := <-c.AgentDataChannel:
-				if err := c.forwardAPMDataByType(ctx, data); err != nil {
+				if err := c.forwardAgentData(ctx, data); err != nil {
 					c.logger.Errorf("Error sending to APM Server, skipping: %v", err)
 				}
 			default:
@@ -119,7 +119,7 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 			c.logger.Debugf("Failed to flush completely, may result in data drop")
 		case apmData := <-c.LambdaDataChannel:
 			c.logger.Debug("Flush in progress - Processing lambda data")
-			if err := c.forwardAPMDataByType(ctx, apmData); err != nil {
+			if err := c.forwardLambdaData(ctx, apmData); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
 		default:
@@ -348,35 +348,32 @@ func (c *Client) WaitForFlush() <-chan struct{} {
 	return c.flushCh
 }
 
-func (c *Client) forwardAPMDataByType(ctx context.Context, apmData APMData) error {
-	switch apmData.Type {
-	case Agent:
-		if c.batch == nil {
-			metadata, err := ProcessMetadata(apmData)
-			if err != nil {
-				return fmt.Errorf("failed to extract metadata from agent payload %w", err)
-			}
-			c.batch = NewBatch(c.maxBatchSize, c.maxBatchAge, metadata)
-			// broadcast that metadata is available
-			close(c.metadataAvailable)
-		}
-		return c.PostToApmServer(ctx, apmData)
-	case Lambda:
-		if c.batch == nil {
-			// This state is not possible since we are pushing back on
-			// lambda logs API until metadata is available.
-			return errors.New("unexpected state, metadata not yet set")
-		}
-		if err := c.batch.Add(apmData); err != nil {
-			c.logger.Warnf("Dropping data due to error: %v", err)
-		}
-		if c.batch.ShouldShip() {
-			return c.sendBatch(ctx)
+func (c *Client) forwardAgentData(ctx context.Context, apmData APMData) error {
+	if c.batch == nil {
+		metadata, err := ProcessMetadata(apmData)
+		if err != nil {
+			return fmt.Errorf("failed to extract metadata from agent payload %w", err)
 		}
-		return nil
-	default:
-		return errors.New("invalid apm data type")
+		c.batch = NewBatch(c.maxBatchSize, c.maxBatchAge, metadata)
+		// broadcast that metadata is available
+		close(c.metadataAvailable)
 	}
+	return c.PostToApmServer(ctx, apmData)
+}
+
+func (c *Client) forwardLambdaData(ctx context.Context, apmData APMData) error {
+	if c.batch == nil {
+		// This state is not possible since we are pushing back on
+		// lambda logs API until metadata is available.
+		return errors.New("unexpected state, metadata not yet set")
+	}
+	if err := c.batch.Add(apmData); err != nil {
+		c.logger.Warnf("Dropping data due to error: %v", err)
+	}
+	if c.batch.ShouldShip() {
+		return c.sendBatch(ctx)
+	}
+	return nil
 }
 
 func (c *Client) sendBatch(ctx context.Context) error {
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index ea149317..cfa21356 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -653,7 +653,6 @@ func TestForwardApmData(t *testing.T) {
 	// Populate metadata by sending agent data
 	apmClient.AgentDataChannel <- apmproxy.APMData{
 		Data: []byte(agentData),
-		Type: apmproxy.Agent,
 	}
 	assertGzipBody(agentData)
 	// After reading agent data with metadata the metadata indicator
@@ -676,7 +675,6 @@ func TestForwardApmData(t *testing.T) {
 		}
 		apmClient.LambdaDataChannel <- apmproxy.APMData{
 			Data: []byte(lambdaData),
-			Type: apmproxy.Lambda,
 		}
 		expected.WriteByte('\n')
 		expected.WriteString(lambdaData)
@@ -719,7 +717,7 @@ func BenchmarkFlushAPMData(b *testing.B) {
 {"transaction": { "id": "00xxxxFFaaaa1234", "trace_id": "0123456789abcdef0123456789abcdef", "name": "amqp receive", "parent_id": "abcdefabcdef01234567", "type": "messaging", "duration": 3, "span_count": { "started": 1 }, "context": {"message": {"queue": { "name": "new_users"}, "age":{ "ms": 1577958057123}, "headers": {"user_id": "1ax3", "involved_services": ["user", "auth"]}, "body": "user created", "routing_key": "user-created-transaction"}},"session":{"id":"sunday","sequence":123}}}
 {"transaction": { "name": "july-2021-delete-after-july-31", "type": "lambda", "result": "success", "id": "142e61450efb8574", "trace_id": "eb56529a1f461c5e7e2f66ecb075e983", "subtype": null, "action": null, "duration": 38.853, "timestamp": 1631736666365048, "sampled": true, "context": { "cloud": { "origin": { "account": { "id": "abc123" }, "provider": "aws", "region": "us-east-1", "service": { "name": "serviceName" } } }, "service": { "origin": { "id": "abc123", "name": "service-name", "version": "1.0" } }, "user": {}, "tags": {}, "custom": { } }, "sync": true, "span_count": { "started": 0 }, "outcome": "unknown", "faas": { "coldstart": false, "execution": "2e13b309-23e1-417f-8bf7-074fc96bc683", "trigger": { "request_id": "FuH2Cir_vHcEMUA=", "type": "http" } }, "sample_rate": 1 } }
 `)
-	agentAPMData := apmproxy.APMData{Data: agentData, Type: apmproxy.Agent}
+	agentAPMData := apmproxy.APMData{Data: agentData}
 
 	b.ReportAllocs()
 	b.ResetTimer()
@@ -728,7 +726,6 @@ func BenchmarkFlushAPMData(b *testing.B) {
 		for j := 0; j < 99; j++ {
 			apmClient.LambdaDataChannel <- apmproxy.APMData{
 				Data: []byte("this is test log"),
-				Type: apmproxy.Lambda,
 			}
 		}
 		apmClient.FlushAPMData(context.Background())
diff --git a/apmproxy/batch.go b/apmproxy/batch.go
index d39ba3b2..d139c259 100644
--- a/apmproxy/batch.go
+++ b/apmproxy/batch.go
@@ -27,8 +27,6 @@ var (
 	// ErrBatchFull signfies that the batch has reached full capacity
 	// and cannot accept more entries.
 	ErrBatchFull = errors.New("batch is full")
-	// ErrInvalidType is returned for any APMData that is not Lambda type
-	ErrInvalidType = errors.New("only accepts lambda type data")
 	// ErrInvalidEncoding is returned for any APMData that is encoded
 	// with any encoding format
 	ErrInvalidEncoding = errors.New("encoded data not supported")
@@ -68,9 +66,6 @@ func (b *BatchData) Add(d APMData) error {
 	if b.count == b.maxSize {
 		return ErrBatchFull
 	}
-	if d.Type != Lambda {
-		return ErrInvalidType
-	}
 	if d.ContentEncoding != "" {
 		return ErrInvalidEncoding
 	}
@@ -113,6 +108,5 @@ func (b *BatchData) Reset() {
 func (b *BatchData) ToAPMData() APMData {
 	return APMData{
 		Data: b.buf.Bytes(),
-		Type: Lambda,
 	}
 }
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
index 22a5de08..bf0182ee 100644
--- a/apmproxy/batch_test.go
+++ b/apmproxy/batch_test.go
@@ -31,19 +31,19 @@ func TestAdd(t *testing.T) {
 	t.Run("empty", func(t *testing.T) {
 		b := NewBatch(1, time.Hour, []byte(metadata))
 
-		assert.NoError(t, b.Add(APMData{Type: Lambda}))
+		assert.NoError(t, b.Add(APMData{}))
 	})
 	t.Run("full", func(t *testing.T) {
 		b := NewBatch(1, time.Hour, []byte(metadata))
-		require.NoError(t, b.Add(APMData{Type: Lambda}))
+		require.NoError(t, b.Add(APMData{}))
 
-		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{Type: Lambda}))
+		assert.ErrorIs(t, ErrBatchFull, b.Add(APMData{}))
 	})
 }
 
 func TestReset(t *testing.T) {
 	b := NewBatch(1, time.Hour, []byte(metadata))
-	require.NoError(t, b.Add(APMData{Type: Lambda}))
+	require.NoError(t, b.Add(APMData{}))
 	require.Equal(t, 1, b.Count())
 	b.Reset()
 
@@ -57,7 +57,7 @@ func TestShouldShip_ReasonSize(t *testing.T) {
 	// Should flush at 90% full
 	for i := 0; i < 9; i++ {
 		assert.False(t, b.ShouldShip())
-		require.NoError(t, b.Add(APMData{Type: Lambda}))
+		require.NoError(t, b.Add(APMData{}))
 	}
 
 	require.Equal(t, 9, b.Count())
@@ -68,7 +68,7 @@ func TestShouldShip_ReasonAge(t *testing.T) {
 	b := NewBatch(10, time.Second, []byte(metadata))
 
 	assert.False(t, b.ShouldShip())
-	require.NoError(t, b.Add(APMData{Type: Lambda}))
+	require.NoError(t, b.Add(APMData{}))
 
 	<-time.After(2 * time.Second)
 
diff --git a/apmproxy/receiver.go b/apmproxy/receiver.go
index 3361b8a4..d4c40951 100644
--- a/apmproxy/receiver.go
+++ b/apmproxy/receiver.go
@@ -29,22 +29,11 @@ import (
 	"time"
 )
 
-// APMDataType represents source of APMData
-type APMDataType string
-
-const (
-	// Agent data type represents APMData collected from APM agents
-	Agent APMDataType = "agent"
-	// Lambda data type represents APMData collected from logs API
-	Lambda APMDataType = "lambda"
-)
-
 // APMData represents data to be sent to APMServer. `Agent` type
 // data will have `metadata` as ndjson whereas `lambda` type data
 // will be without metadata.
 type APMData struct {
 	Data            []byte
-	Type            APMDataType
 	ContentEncoding string
 }
 
@@ -139,7 +128,6 @@ func (c *Client) handleIntakeV2Events() func(w http.ResponseWriter, r *http.Requ
 
 		agentData := APMData{
 			Data:            rawBytes,
-			Type:            Agent,
 			ContentEncoding: r.Header.Get("Content-Encoding"),
 		}
 
diff --git a/logsapi/functionlogs.go b/logsapi/functionlogs.go
index d32219fd..19b67b72 100644
--- a/logsapi/functionlogs.go
+++ b/logsapi/functionlogs.go
@@ -108,7 +108,6 @@ func ProcessFunctionLog(
 	}
 
 	return apmproxy.APMData{
-		Type: apmproxy.Lambda,
 		Data: jsonWriter.Bytes(),
 	}, nil
 }
diff --git a/logsapi/functionlogs_test.go b/logsapi/functionlogs_test.go
index 7952bf04..7ebf6dee 100644
--- a/logsapi/functionlogs_test.go
+++ b/logsapi/functionlogs_test.go
@@ -22,7 +22,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/elastic/apm-aws-lambda/apmproxy"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -46,6 +45,5 @@ func TestProcessFunctionLog(t *testing.T) {
 	apmData, err := ProcessFunctionLog(reqID, invokedFnArn, event)
 
 	require.NoError(t, err)
-	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 	assert.Equal(t, expectedData, string(apmData.Data))
 }
diff --git a/logsapi/metrics.go b/logsapi/metrics.go
index dc670047..c7ebf92a 100644
--- a/logsapi/metrics.go
+++ b/logsapi/metrics.go
@@ -101,7 +101,6 @@ func ProcessPlatformReport(functionData *extension.NextEventResponse, platformRe
 	}
 
 	return apmproxy.APMData{
-		Type: apmproxy.Lambda,
 		Data: jsonWriter.Bytes(),
 	}, nil
 }
diff --git a/logsapi/metrics_test.go b/logsapi/metrics_test.go
index 837276d7..e1e53941 100644
--- a/logsapi/metrics_test.go
+++ b/logsapi/metrics_test.go
@@ -70,7 +70,6 @@ func TestProcessPlatformReport_Coldstart(t *testing.T) {
 
 	apmData, err := ProcessPlatformReport(&event, logEvent)
 	require.NoError(t, err)
-	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
 	requestBytes, err := apmproxy.GetUncompressedBytes(apmData.Data, "")
 	require.NoError(t, err)
@@ -121,7 +120,6 @@ func TestProcessPlatformReport_NoColdstart(t *testing.T) {
 
 	apmData, err := ProcessPlatformReport(&event, logEvent)
 	require.NoError(t, err)
-	assert.Equal(t, apmproxy.Lambda, apmData.Type)
 
 	requestBytes, err := apmproxy.GetUncompressedBytes(apmData.Data, "")
 	require.NoError(t, err)

From add41785a6792b1b439469d68bbee70643e2a10f Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Tue, 18 Oct 2022 20:48:50 +0800
Subject: [PATCH 12/15] Minor refactor

---
 apmproxy/apmserver.go | 4 ++--
 main_test.go          | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 0be3e8ad..9075d55c 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -363,8 +363,8 @@ func (c *Client) forwardAgentData(ctx context.Context, apmData APMData) error {
 
 func (c *Client) forwardLambdaData(ctx context.Context, apmData APMData) error {
 	if c.batch == nil {
-		// This state is not possible since we are pushing back on
-		// lambda logs API until metadata is available.
+		// This state is not possible since we start processing lambda
+		// logs only after metadata is available and batch is created.
 		return errors.New("unexpected state, metadata not yet set")
 	}
 	if err := c.batch.Add(apmData); err != nil {
diff --git a/main_test.go b/main_test.go
index cf5ee376..95ae534f 100644
--- a/main_test.go
+++ b/main_test.go
@@ -324,7 +324,6 @@ func processMockEvent(q chan<- logsapi.LogEvent, currID string, event MockEvent,
 	case Shutdown:
 	}
 	if sendRuntimeDone {
-		l.Info("Queueuing runtime done event")
 		queueLogEvent(q, currID, logsapi.PlatformRuntimeDone, l)
 	}
 	if sendMetrics {

From ff98be99cdbd4805a1237af5a39c56b4e0135f8f Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Wed, 19 Oct 2022 13:05:43 +0800
Subject: [PATCH 13/15] Remove metadata available channel and other minor
 issues

---
 apmproxy/apmserver.go      | 45 ++++++++++----------------------------
 apmproxy/apmserver_test.go | 29 +-----------------------
 apmproxy/batch_test.go     |  4 ++--
 apmproxy/client.go         |  6 -----
 apmproxy/client_test.go    |  8 -------
 apmproxy/option.go         |  8 -------
 apmproxy/receiver_test.go  |  6 -----
 app/app.go                 |  6 -----
 app/config.go              |  9 --------
 logsapi/event.go           | 10 +++++++--
 main.go                    |  1 -
 main_test.go               |  3 ---
 12 files changed, 22 insertions(+), 113 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index 9075d55c..f21b9bdb 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -48,20 +48,7 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 		return nil
 	}
 
-	// Wait for metadata to be available, metadata will be available as soon as
-	// the first agent data is processed.
-	select {
-	case <-ctx.Done():
-		c.logger.Debug("Invocation context cancelled, not processing any more agent data")
-		return nil
-	case data := <-c.AgentDataChannel:
-		if err := c.forwardAgentData(ctx, data); err != nil {
-			return err
-		}
-	case <-c.metadataAvailable:
-	}
-
-	// Process lambda and agent data after metadata is available
+	var lambdaDataChan chan APMData
 	for {
 		select {
 		case <-ctx.Done():
@@ -71,7 +58,10 @@ func (c *Client) ForwardApmData(ctx context.Context) error {
 			if err := c.forwardAgentData(ctx, data); err != nil {
 				return err
 			}
-		case data := <-c.LambdaDataChannel:
+			// Wait for metadata to be available, metadata will be available as soon as
+			// the first agent data is processed.
+			lambdaDataChan = c.LambdaDataChannel
+		case data := <-lambdaDataChan:
 			if err := c.forwardLambdaData(ctx, data); err != nil {
 				return err
 			}
@@ -88,26 +78,15 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 	c.logger.Debug("Flush started - Checking for agent data")
 
 	// Flush agent data first to make sure metadata is available if possible
-	func() {
-		for {
-			select {
-			case <-ctx.Done():
-				c.logger.Debugf("Failed to flush completely, may result in data drop")
-			case data := <-c.AgentDataChannel:
-				if err := c.forwardAgentData(ctx, data); err != nil {
-					c.logger.Errorf("Error sending to APM Server, skipping: %v", err)
-				}
-			default:
-				c.logger.Debug("Flush ended for agent data - no data in buffer")
-				return
-			}
+	for i := len(c.AgentDataChannel); i > 0; i-- {
+		data := <-c.AgentDataChannel
+		if err := c.forwardAgentData(ctx, data); err != nil {
+			c.logger.Errorf("Error sending to APM Server, skipping: %v", err)
 		}
-	}()
+	}
 
 	// If metadata still not available then fail fast
-	select {
-	case <-c.metadataAvailable:
-	default:
+	if c.batch == nil {
 		c.logger.Warnf("Metadata not available at flush, skipping sending lambda data to APM Server")
 		return
 	}
@@ -355,8 +334,6 @@ func (c *Client) forwardAgentData(ctx context.Context, apmData APMData) error {
 			return fmt.Errorf("failed to extract metadata from agent payload %w", err)
 		}
 		c.batch = NewBatch(c.maxBatchSize, c.maxBatchAge, metadata)
-		// broadcast that metadata is available
-		close(c.metadataAvailable)
 	}
 	return c.PostToApmServer(ctx, apmData)
 }
diff --git a/apmproxy/apmserver_test.go b/apmproxy/apmserver_test.go
index cfa21356..5bba30c3 100644
--- a/apmproxy/apmserver_test.go
+++ b/apmproxy/apmserver_test.go
@@ -78,7 +78,6 @@ func TestPostToApmServerDataCompressed(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.PostToApmServer(context.Background(), agentData))
@@ -125,7 +124,6 @@ func TestPostToApmServerDataNotCompressed(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.PostToApmServer(context.Background(), agentData))
@@ -135,7 +133,6 @@ func TestGracePeriod(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -176,7 +173,6 @@ func TestSetHealthyTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -190,7 +186,6 @@ func TestSetFailingTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.ReconnectionCount = 0
@@ -203,7 +198,6 @@ func TestSetPendingTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -219,7 +213,6 @@ func TestSetPendingTransportExplicitly(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -232,7 +225,6 @@ func TestSetInvalidTransport(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL("https://example.com"),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -277,7 +269,6 @@ func TestEnterBackoffFromHealthy(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -332,7 +323,6 @@ func TestEnterBackoffFromFailing(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -387,7 +377,6 @@ func TestAPMServerRecovery(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -434,7 +423,6 @@ func TestAPMServerAuthFails(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -486,7 +474,6 @@ func TestAPMServerRatelimit(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	assert.Equal(t, apmClient.Status, apmproxy.Started)
@@ -540,7 +527,6 @@ func TestAPMServerClientFail(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	assert.Equal(t, apmClient.Status, apmproxy.Started)
@@ -591,7 +577,6 @@ func TestContinuedAPMServerFailure(t *testing.T) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	apmClient.UpdateStatus(context.Background(), apmproxy.Healthy)
@@ -629,12 +614,10 @@ func TestForwardApmData(t *testing.T) {
 	}
 	agentData := fmt.Sprintf("%s\n%s", metadata, `{"log": {"message": "test"}}`)
 	lambdaData := `{"log": {"message": "test"}}`
-	metaAvailable := make(chan struct{})
 	maxBatchAge := 1 * time.Second
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(metaAvailable),
 		apmproxy.WithAgentDataBufferSize(10),
 		// Configure a small batch age for ease of testing
 		apmproxy.WithMaxBatchAge(maxBatchAge),
@@ -655,14 +638,6 @@ func TestForwardApmData(t *testing.T) {
 		Data: []byte(agentData),
 	}
 	assertGzipBody(agentData)
-	// After reading agent data with metadata the metadata indicator
-	// channel should be closed
-	select {
-	case _, ok := <-metaAvailable:
-		require.False(t, ok)
-	default:
-		require.Fail(t, "meta channel should be closed after reading agent data")
-	}
 
 	// Send lambda logs API data
 	var expected bytes.Buffer
@@ -671,7 +646,7 @@ func TestForwardApmData(t *testing.T) {
 	for i := 0; i < 5; i++ {
 		if i == 4 {
 			// Wait for batch age to make sure the batch is mature to be sent
-			<-time.After(maxBatchAge + time.Millisecond)
+			time.Sleep(maxBatchAge + time.Millisecond)
 		}
 		apmClient.LambdaDataChannel <- apmproxy.APMData{
 			Data: []byte(lambdaData),
@@ -705,7 +680,6 @@ func BenchmarkFlushAPMData(b *testing.B) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(b).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(b, err)
 
@@ -751,7 +725,6 @@ func BenchmarkPostToAPM(b *testing.B) {
 	apmClient, err := apmproxy.NewClient(
 		apmproxy.WithURL(apmServer.URL),
 		apmproxy.WithLogger(zaptest.NewLogger(b).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(b, err)
 
diff --git a/apmproxy/batch_test.go b/apmproxy/batch_test.go
index bf0182ee..2d93d122 100644
--- a/apmproxy/batch_test.go
+++ b/apmproxy/batch_test.go
@@ -70,9 +70,9 @@ func TestShouldShip_ReasonAge(t *testing.T) {
 	assert.False(t, b.ShouldShip())
 	require.NoError(t, b.Add(APMData{}))
 
-	<-time.After(2 * time.Second)
+	time.Sleep(time.Second + time.Millisecond)
 
-	// Should be ready for send after 3 seconds
+	// Should be ready to send now
 	require.Equal(t, 1, b.Count())
 	assert.True(t, b.ShouldShip())
 }
diff --git a/apmproxy/client.go b/apmproxy/client.go
index 6f9d62e5..feccd734 100644
--- a/apmproxy/client.go
+++ b/apmproxy/client.go
@@ -73,8 +73,6 @@ type Client struct {
 	batch        *BatchData
 	maxBatchSize int
 	maxBatchAge  time.Duration
-
-	metadataAvailable chan struct{}
 }
 
 func NewClient(opts ...Option) (*Client, error) {
@@ -115,10 +113,6 @@ func NewClient(opts ...Option) (*Client, error) {
 		return nil, errors.New("logger cannot be empty")
 	}
 
-	if c.metadataAvailable == nil {
-		return nil, errors.New("metadata available indicator is required")
-	}
-
 	// normalize server URL
 	if !strings.HasSuffix(c.serverURL, "/") {
 		c.serverURL = c.serverURL + "/"
diff --git a/apmproxy/client_test.go b/apmproxy/client_test.go
index 3acab6db..3f57c73b 100644
--- a/apmproxy/client_test.go
+++ b/apmproxy/client_test.go
@@ -44,18 +44,10 @@ func TestClient(t *testing.T) {
 			},
 			expectedErr: true,
 		},
-		"missing metadata indicator": {
-			opts: []apmproxy.Option{
-				apmproxy.WithURL("https://example.com"),
-				apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-			},
-			expectedErr: true,
-		},
 		"valid": {
 			opts: []apmproxy.Option{
 				apmproxy.WithURL("https://example.com"),
 				apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-				apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 			},
 		},
 	}
diff --git a/apmproxy/option.go b/apmproxy/option.go
index 34622fb1..f1862532 100644
--- a/apmproxy/option.go
+++ b/apmproxy/option.go
@@ -86,14 +86,6 @@ func WithLogger(logger *zap.SugaredLogger) Option {
 	}
 }
 
-// WithMetadataAvailableIndicator configures a channel
-// which will broadcast metadata available event on close
-func WithMetadataAvailableIndicator(ch chan struct{}) Option {
-	return func(c *Client) {
-		c.metadataAvailable = ch
-	}
-}
-
 // WithMaxBatchSize configures the maximum batch size for
 // the payload sent to the APMServer
 func WithMaxBatchSize(size int) Option {
diff --git a/apmproxy/receiver_test.go b/apmproxy/receiver_test.go
index e908eed6..155ae79a 100644
--- a/apmproxy/receiver_test.go
+++ b/apmproxy/receiver_test.go
@@ -58,7 +58,6 @@ func TestInfoProxy(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -104,7 +103,6 @@ func TestInfoProxyErrorStatusCode(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -143,7 +141,6 @@ func Test_handleInfoRequest(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 
@@ -184,7 +181,6 @@ func Test_handleIntakeV2EventsQueryParam(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
@@ -227,7 +223,6 @@ func Test_handleIntakeV2EventsNoQueryParam(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
@@ -271,7 +266,6 @@ func Test_handleIntakeV2EventsQueryParamEmptyData(t *testing.T) {
 		apmproxy.WithReceiverAddress(":1234"),
 		apmproxy.WithReceiverTimeout(15*time.Second),
 		apmproxy.WithLogger(zaptest.NewLogger(t).Sugar()),
-		apmproxy.WithMetadataAvailableIndicator(make(chan struct{})),
 	)
 	require.NoError(t, err)
 	require.NoError(t, apmClient.StartReceiver())
diff --git a/app/app.go b/app/app.go
index 3f928b35..d2b9f808 100644
--- a/app/app.go
+++ b/app/app.go
@@ -19,7 +19,6 @@ package app
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"os"
 	"strconv"
@@ -53,10 +52,6 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		opt(&c)
 	}
 
-	if c.metadataAvailable == nil {
-		return nil, errors.New("metadata available indicator is required")
-	}
-
 	app := &App{
 		extensionName: c.extensionName,
 	}
@@ -137,7 +132,6 @@ func New(ctx context.Context, opts ...ConfigOption) (*App, error) {
 		apmproxy.WithLogger(app.logger),
 		apmproxy.WithAPIKey(apmServerAPIKey),
 		apmproxy.WithSecretToken(apmServerSecretToken),
-		apmproxy.WithMetadataAvailableIndicator(c.metadataAvailable),
 	)
 
 	ac, err := apmproxy.NewClient(apmOpts...)
diff --git a/app/config.go b/app/config.go
index c34bcf61..4311de5a 100644
--- a/app/config.go
+++ b/app/config.go
@@ -27,7 +27,6 @@ type appConfig struct {
 	enableFunctionLogSubscription bool
 	logLevel                      string
 	logsapiAddr                   string
-	metadataAvailable             chan struct{}
 }
 
 // ConfigOption is used to configure the lambda extension
@@ -86,11 +85,3 @@ func WithAWSConfig(awsConfig aws.Config) ConfigOption {
 		c.awsConfig = awsConfig
 	}
 }
-
-// WithMetadataAvailableIndicator configures a channel which should
-// be closed when metadata is available after parsing agent data.
-func WithMetadataAvailableIndicator(ch chan struct{}) ConfigOption {
-	return func(c *appConfig) {
-		c.metadataAvailable = ch
-	}
-}
diff --git a/logsapi/event.go b/logsapi/event.go
index 9c891512..414bbff5 100644
--- a/logsapi/event.go
+++ b/logsapi/event.go
@@ -93,7 +93,10 @@ func (lc *Client) ProcessLogs(
 					if err != nil {
 						lc.logger.Errorf("Error processing Lambda platform metrics: %v", err)
 					} else {
-						apmClient.LambdaDataChannel <- processedMetrics
+						select {
+						case apmClient.LambdaDataChannel <- processedMetrics:
+						case <-ctx.Done():
+						}
 					}
 				} else {
 					lc.logger.Warn("report event request id didn't match the previous event id")
@@ -111,7 +114,10 @@ func (lc *Client) ProcessLogs(
 				if err != nil {
 					lc.logger.Errorf("Error processing function log : %v", err)
 				} else {
-					apmClient.LambdaDataChannel <- processedLog
+					select {
+					case apmClient.LambdaDataChannel <- processedLog:
+					case <-ctx.Done():
+					}
 				}
 			}
 		case <-ctx.Done():
diff --git a/main.go b/main.go
index 0a53c2c3..6e43626e 100644
--- a/main.go
+++ b/main.go
@@ -45,7 +45,6 @@ func main() {
 		app.WithLambdaRuntimeAPI(os.Getenv("AWS_LAMBDA_RUNTIME_API")),
 		app.WithLogLevel(os.Getenv("ELASTIC_APM_LOG_LEVEL")),
 		app.WithAWSConfig(cfg),
-		app.WithMetadataAvailableIndicator(make(chan struct{})),
 	}
 
 	captureLogs, err := strconv.ParseBool(os.Getenv("ELASTIC_APM_LAMBDA_CAPTURE_LOGS"))
diff --git a/main_test.go b/main_test.go
index 95ae534f..774d759a 100644
--- a/main_test.go
+++ b/main_test.go
@@ -824,14 +824,11 @@ func TestMetricsWithMetadata(t *testing.T) {
 
 func runApp(t *testing.T, logsapiAddr string) <-chan struct{} {
 	ctx, cancel := context.WithCancel(context.Background())
-	metadataIndicator := make(chan struct{})
-
 	app, err := app.New(ctx,
 		app.WithExtensionName("apm-lambda-extension"),
 		app.WithLambdaRuntimeAPI(os.Getenv("AWS_LAMBDA_RUNTIME_API")),
 		app.WithLogLevel("debug"),
 		app.WithLogsapiAddress(logsapiAddr),
-		app.WithMetadataAvailableIndicator(metadataIndicator),
 	)
 	require.NoError(t, err)
 

From 8892605073ea24ec18efc3a1d93ffab12c9fd9f1 Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Wed, 19 Oct 2022 15:04:38 +0800
Subject: [PATCH 14/15] Fix context done handling

---
 apmproxy/apmserver.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/apmproxy/apmserver.go b/apmproxy/apmserver.go
index f21b9bdb..634e6a2f 100644
--- a/apmproxy/apmserver.go
+++ b/apmproxy/apmserver.go
@@ -94,13 +94,14 @@ func (c *Client) FlushAPMData(ctx context.Context) {
 	// Flush lambda data
 	for {
 		select {
-		case <-ctx.Done():
-			c.logger.Debugf("Failed to flush completely, may result in data drop")
 		case apmData := <-c.LambdaDataChannel:
 			c.logger.Debug("Flush in progress - Processing lambda data")
 			if err := c.forwardLambdaData(ctx, apmData); err != nil {
 				c.logger.Errorf("Error sending to APM server, skipping: %v", err)
 			}
+		case <-ctx.Done():
+			c.logger.Debugf("Failed to flush completely, may result in data drop")
+			return
 		default:
 			// Flush any remaining data in batch
 			if err := c.sendBatch(ctx); err != nil {

From fa41c113909e639b8d2bc9f7f9e23dba8689e44a Mon Sep 17 00:00:00 2001
From: Vishal Raj <vishal.raj@elastic.co>
Date: Wed, 19 Oct 2022 15:17:52 +0800
Subject: [PATCH 15/15] Add changelog

---
 CHANGELOG.asciidoc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 69e7ee54..41c18298 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -30,6 +30,7 @@ https://github.com/elastic/apm-aws-lambda/compare/v1.1.0...main[View commits]
 ===== Features
 - Disable CGO to prevent libc/ABI compatibility issues {lambda-pull}292[292]
 - Add support for collecting and shipping function logs to APM Server {lambda-pull}303[303]
+- Batch data collected from lambda logs API before sending to APM Server {lambda-pull}314[314]
 
 [float]
 ===== Bug fixes