diff --git a/pkg/splunk/enterprise/afwscheduler.go b/pkg/splunk/enterprise/afwscheduler.go index cc99eca0d..2dd2fd667 100644 --- a/pkg/splunk/enterprise/afwscheduler.go +++ b/pkg/splunk/enterprise/afwscheduler.go @@ -766,8 +766,15 @@ func installApp(rctx context.Context, localCtx *localScopePlaybookContext, cr sp streamOptions := splutil.NewStreamOptionsObject(command) stdOut, stdErr, err := localCtx.podExecClient.RunPodExecCommand(rctx, streamOptions, []string{"/bin/sh"}) - // if the app was already installed previously, then just mark it for install complete - if stdErr != "" || err != nil { + + // TODO(patrykw-splunk): remove this once we have confirm that we are not using stderr for error detection at all + // Log stderr content for debugging but don't use it for error detection + if stdErr != "" { + scopedLog.Info("App install command stderr output (informational only)", "stderr", stdErr) + } + + // Check only the actual command execution error, not stderr content + if err != nil { phaseInfo.FailCount++ scopedLog.Error(err, "local scoped app package install failed", "stdout", stdOut, "stderr", stdErr, "app pkg path", appPkgPathOnPod, "failCount", phaseInfo.FailCount) return fmt.Errorf("local scoped app package install failed. stdOut: %s, stdErr: %s, app pkg path: %s, failCount: %d", stdOut, stdErr, appPkgPathOnPod, phaseInfo.FailCount) @@ -785,12 +792,13 @@ func isAppAlreadyInstalled(ctx context.Context, cr splcommon.MetaObject, podExec scopedLog.Info("check app's installation state") - command := fmt.Sprintf("/opt/splunk/bin/splunk list app %s -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED; echo -n $?", appTopFolder) + command := fmt.Sprintf("/opt/splunk/bin/splunk list app %s -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED", appTopFolder) streamOptions := splutil.NewStreamOptionsObject(command) stdOut, stdErr, err := podExecClient.RunPodExecCommand(ctx, streamOptions, []string{"/bin/sh"}) + // Handle specific stderr cases first if strings.Contains(stdErr, "Could not find object") { // when app is not installed you will see something like on StdErr: // "Could not find object id=" @@ -798,15 +806,37 @@ func isAppAlreadyInstalled(ctx context.Context, cr splcommon.MetaObject, podExec return false, nil } - if stdErr != "" || err != nil { - return false, fmt.Errorf("could not get installed app status stdOut: %s, stdErr: %s, command: %s", stdOut, stdErr, command) + // Log any other stderr content for debugging but don't use it for error detection + if stdErr != "" { + scopedLog.Info("Command stderr output (informational only)", "stderr", stdErr) } - appInstallCheck, _ := strconv.Atoi(stdOut) + // Now check the actual command result + if err != nil { + // The command pipeline ends with 'grep ENABLED', so exit codes follow grep semantics: + // For grep: exit code 1 = pattern not found, exit code 2+ = actual error + errMsg := err.Error() + + // Check for grep exit code 1 (pattern not found) + if strings.Contains(errMsg, "exit status 1") || strings.Contains(errMsg, "command terminated with exit code 1") { + // grep exit code 1 means "ENABLED" pattern not found - app exists but is not enabled + scopedLog.Info("App not enabled - grep pattern not found", "stdout", stdOut, "stderr", stdErr) + return false, nil + } + + // Any other exit code indicates a real error (splunk command failed, etc.) + return false, fmt.Errorf("could not get installed app status stdOut: %s, stdErr: %s, error: %v, command: %s", stdOut, stdErr, err, command) + } - scopedLog.Info("Apps installation state", stdOut, stdOut) + // If we reach here, grep found "ENABLED" (exit code 0) + // stdOut should contain the app status line with "ENABLED" + if stdOut == "" { + // This shouldn't happen if grep succeeded, but let's be safe + return false, fmt.Errorf("command succeeded but no output received, command: %s", command) + } - return appInstallCheck == 0, nil + scopedLog.Info("App installation state check successful - app is enabled", "appStatus", strings.TrimSpace(stdOut)) + return true, nil } // get the name of top folder from the package. @@ -1320,6 +1350,15 @@ installPhase: phaseInfo := getPhaseInfoByPhaseType(ctx, installWorker, enterpriseApi.PhaseInstall) if isPhaseMaxRetriesReached(ctx, phaseInfo, installWorker.afwConfig) { phaseInfo.Status = enterpriseApi.AppPkgInstallError + + // For fanout CRs, also update the main PhaseInfo to reflect the failure + if isFanOutApplicableToCR(installWorker.cr) { + scopedLog.Info("Max retries reached for fanout CR - updating main phase info", "app", installWorker.appDeployInfo.AppName, "failCount", phaseInfo.FailCount) + installWorker.appDeployInfo.PhaseInfo.Phase = enterpriseApi.PhaseInstall + installWorker.appDeployInfo.PhaseInfo.Status = enterpriseApi.AppPkgInstallError + installWorker.appDeployInfo.DeployStatus = enterpriseApi.DeployStatusError + } + ppln.deleteWorkerFromPipelinePhase(ctx, phaseInfo.Phase, installWorker) } else if isPhaseStatusComplete(phaseInfo) { ppln.deleteWorkerFromPipelinePhase(ctx, phaseInfo.Phase, installWorker) diff --git a/pkg/splunk/enterprise/afwscheduler_test.go b/pkg/splunk/enterprise/afwscheduler_test.go index 87c5f2ba8..e3b1f336c 100644 --- a/pkg/splunk/enterprise/afwscheduler_test.go +++ b/pkg/splunk/enterprise/afwscheduler_test.go @@ -3061,18 +3061,23 @@ func TestRunLocalScopedPlaybook(t *testing.T) { t.Errorf("Failed to detect that steps to get installed app failed") } - // Test3: get installed app name passes but getting installed app name failed + // Test3: get installed app name passes but isAppAlreadyInstalled fails with real error (not "Could not find object") mockPodExecReturnContexts[1].StdErr = "" + mockPodExecReturnContexts[2].StdErr = "Some other real error message" // Real error, not "Could not find object" + mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 2") // Real error, not grep exit code 1 localInstallCtxt.sem <- struct{}{} waiter.Add(1) err = localInstallCtxt.runPlaybook(ctx) if err == nil { - t.Errorf("Failed to detect not able to get installed app name: err") + t.Errorf("Failed to detect isAppAlreadyInstalled error") } - // Test4: get installed app command passes but installing app fails - mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled - mockPodExecReturnContexts[2].StdErr = "" //no error thrown + // Test4: isAppAlreadyInstalled returns app not enabled (grep exit code 1), then install fails + mockPodExecReturnContexts[2].StdOut = "" // No stdout means grep didn't find ENABLED + mockPodExecReturnContexts[2].StdErr = "" // No stderr + mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 1") // grep exit code 1 = pattern not found + mockPodExecReturnContexts[3].StdErr = "real installation error" // This is just logged now + mockPodExecReturnContexts[3].Err = fmt.Errorf("install command failed") // This causes the actual failure localInstallCtxt.sem <- struct{}{} waiter.Add(1) @@ -3081,28 +3086,35 @@ func TestRunLocalScopedPlaybook(t *testing.T) { t.Errorf("Expected app install failed") } - mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled - mockPodExecReturnContexts[2].StdErr = "Could not find object" + // Test5: App not found scenario (Could not find object) - should proceed to install but install fails + mockPodExecReturnContexts[2].StdOut = "" + mockPodExecReturnContexts[2].StdErr = "Could not find object id=app1" + mockPodExecReturnContexts[2].Err = nil // This should return false, nil (app not installed) + // Keep the installation error from previous test to make install fail + mockPodExecReturnContexts[3].StdErr = "real installation error" // Install should fail + mockPodExecReturnContexts[3].Err = fmt.Errorf("install failed") localInstallCtxt.sem <- struct{}{} waiter.Add(1) err = localInstallCtxt.runPlaybook(ctx) if err == nil { - t.Errorf("Expected app install failed") + t.Errorf("Expected app install failed due to installation error") } - // Test5: install app should be successful - - mockPodExecReturnContexts[3].StdErr = "" //no error for app install + // Test6: Install succeeds with stderr content (should be ignored), but cleanup fails + mockPodExecReturnContexts[3].StdErr = "Some informational message in stderr" // Stderr content should be ignored + mockPodExecReturnContexts[3].Err = nil // No actual error for install + // Keep cleanup failure from previous test setup to make overall test fail + // mockPodExecReturnContexts[4] still has error from earlier localInstallCtxt.sem <- struct{}{} waiter.Add(1) err = localInstallCtxt.runPlaybook(ctx) if err == nil { - t.Errorf("Expected app install succeeded but app arhive deletion failed") + t.Errorf("Expected app install succeeded but app archive deletion failed") } - // Test6: successful scenario where everything succeeds + // Test7: successful scenario where everything succeeds mockPodExecReturnContexts[4].StdErr = "" localInstallCtxt.sem <- struct{}{} waiter.Add(1) @@ -3301,19 +3313,24 @@ func TestPremiumAppScopedPlaybook(t *testing.T) { t.Errorf("Failed to detect that steps to get installed app failed") } - // Test3: get installed app name passes but getting installed app name failed + // Test3: get installed app name passes but isAppAlreadyInstalled fails with real error (not "Could not find object") mockPodExecReturnContexts[1].StdErr = "" + mockPodExecReturnContexts[2].StdErr = "Some other real error message" // Real error, not "Could not find object" + mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 2") // Real error, not grep exit code 1 localInstallCtxt.sem <- struct{}{} waiter.Add(1) err = pCtx.runPlaybook(ctx) if err == nil { - t.Errorf("Failed to detect not able to get installed app name: err") + t.Errorf("Failed to detect isAppAlreadyInstalled error") } - // Test4: get installed app command passes, it returns app is not enabled - // so app install will run and it should fail - mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled - mockPodExecReturnContexts[2].StdErr = "" //no error thrown + // Test4: isAppAlreadyInstalled returns app is not enabled (grep exit code 1) + // so app install will run and it should fail with real error + mockPodExecReturnContexts[2].StdOut = "" // No stdout means grep didn't find ENABLED + mockPodExecReturnContexts[2].StdErr = "" // No stderr + mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 1") // grep exit code 1 = pattern not found + mockPodExecReturnContexts[3].StdErr = "real installation error" // This is just logged now + mockPodExecReturnContexts[3].Err = fmt.Errorf("install command failed") // This causes the actual failure localInstallCtxt.sem <- struct{}{} waiter.Add(1) @@ -3322,9 +3339,9 @@ func TestPremiumAppScopedPlaybook(t *testing.T) { t.Errorf("Expected app install failed") } - // Test5: install app should be successful but es post install fails - - mockPodExecReturnContexts[3].StdErr = "" //no error for app install + // Test5: Install succeeds with stderr content (should be ignored), but post install fails + mockPodExecReturnContexts[3].StdErr = "Some informational message in stderr" // Stderr content should be ignored + mockPodExecReturnContexts[3].Err = nil // No actual error for install localInstallCtxt.sem <- struct{}{} waiter.Add(1) @@ -3343,7 +3360,24 @@ func TestPremiumAppScopedPlaybook(t *testing.T) { t.Errorf("Expected es post install succeeded but app arhive deletion failed") } - // Test7: successful scenario where everything succeeds + // Test7: App already installed with stderr content - should skip installation + // Reset all mock contexts for this test + mockPodExecReturnContexts[0].Err = nil // File exists check passes + mockPodExecReturnContexts[1].StdErr = "" // Get app name passes + mockPodExecReturnContexts[1].StdOut = "app1" // App name is found + mockPodExecReturnContexts[2].StdOut = "app1 CONFIGURED ENABLED VISIBLE" // App is already enabled + mockPodExecReturnContexts[2].StdErr = "Some informational message in stderr" + mockPodExecReturnContexts[2].Err = nil // No error - app is found and enabled + // Install step should be skipped, but cleanup should still work + mockPodExecReturnContexts[5].StdErr = "" // Cleanup should succeed + localInstallCtxt.sem <- struct{}{} + waiter.Add(1) + err = pCtx.runPlaybook(ctx) + if err != nil { + t.Errorf("runPlayBook should not have returned error when app is already installed with stderr content. err=%s", err.Error()) + } + + // Test8: successful scenario where everything succeeds mockPodExecReturnContexts[5].StdErr = "" localInstallCtxt.sem <- struct{}{} waiter.Add(1) @@ -4462,3 +4496,126 @@ func TestAddTelAppCManager(t *testing.T) { // Negative testing addTelApp(ctx, mockPodExecClient, 2, &crNew) } + +func TestIsAppAlreadyInstalled(t *testing.T) { + ctx := context.TODO() + + tests := []struct { + name string + stdOut string + stdErr string + err error + expectedResult bool + expectedError bool + description string + }{ + { + name: "App is enabled - success case", + stdOut: "myapp CONFIGURED ENABLED VISIBLE", + stdErr: "", + err: nil, + expectedResult: true, + expectedError: false, + description: "App is found and enabled", + }, + { + name: "App not found - grep exit code 1", + stdOut: "", + stdErr: "", + err: fmt.Errorf("command terminated with exit code 1"), + expectedResult: false, + expectedError: false, + description: "App not enabled - grep pattern not found", + }, + { + name: "App not found - Could not find object", + stdOut: "", + stdErr: "Could not find object id=myapp", + err: nil, + expectedResult: false, + expectedError: false, + description: "App not installed at all", + }, + { + name: "App enabled with stderr content", + stdOut: "myapp CONFIGURED ENABLED VISIBLE", + stdErr: "Some informational message in stderr", + err: nil, + expectedResult: true, + expectedError: false, + description: "Stderr content should be ignored when app is enabled", + }, + { + name: "App not enabled with stderr content", + stdOut: "", + stdErr: "Some informational message in stderr", + err: fmt.Errorf("exit status 1"), + expectedResult: false, + expectedError: false, + description: "Stderr content should be ignored, grep exit code 1 means not enabled", + }, + { + name: "Real error - exit code 2", + stdOut: "", + stdErr: "Some real error occurred", + err: fmt.Errorf("exit status 2"), + expectedResult: false, + expectedError: true, + description: "Real error should be returned", + }, + { + name: "Command succeeded but no output", + stdOut: "", + stdErr: "", + err: nil, + expectedResult: false, + expectedError: true, + description: "Should error if command succeeds but no output", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a test CR + cr := &enterpriseApi.Standalone{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-standalone", + Namespace: "test", + }, + } + + // Create mock pod exec client with CR + mockPodExecClient := &spltest.MockPodExecClient{Cr: cr} + mockPodExecClient.SetTargetPodName(ctx, "test-pod") + + // Set up the mock return context + mockReturnContext := &spltest.MockPodExecReturnContext{ + StdOut: tt.stdOut, + StdErr: tt.stdErr, + Err: tt.err, + } + + // Add the mock command and return context - use the exact command pattern + command := "/opt/splunk/bin/splunk list app testapp -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED" + mockPodExecClient.AddMockPodExecReturnContexts(ctx, []string{command}, mockReturnContext) + + // Call the function + result, err := isAppAlreadyInstalled(ctx, cr, mockPodExecClient, "testapp") + + // Check results + if tt.expectedError { + if err == nil { + t.Errorf("Expected error but got none for test: %s", tt.description) + } + } else { + if err != nil { + t.Errorf("Unexpected error for test '%s': %v", tt.description, err) + } + } + + if result != tt.expectedResult { + t.Errorf("Expected result %v but got %v for test: %s", tt.expectedResult, result, tt.description) + } + }) + } +}