@@ -28,7 +28,6 @@ import (
2828 "syscall"
2929 "time"
3030
31- "github.com/stretchr/testify/require"
3231 gopkgyaml "gopkg.in/yaml.v3"
3332
3433 "k8s.io/apimachinery/pkg/util/wait"
@@ -66,6 +65,13 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
6665 t .Logf ("PROMETHEUS_URL environment variable unset, skipping Prometheus scrape config generation" )
6766 return
6867 }
68+
69+ caFile := filepath .Join (srv .CADirectory (), "apiserver.crt" )
70+ if _ , err := os .Stat (caFile ); os .IsNotExist (err ) {
71+ t .Logf ("CA file %s does not exist, skipping Prometheus scrape config for server %s" , caFile , srv .Name ())
72+ return
73+ }
74+
6975 jobName := fmt .Sprintf ("kcp-%s-%s" , srv .Name (), t .Name ())
7076 labels := map [string ]string {
7177 "server" : srv .Name (),
@@ -75,8 +81,23 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
7581 ctx , cancel := context .WithTimeout (context .Background (), wait .ForeverTestTimeout )
7682 defer cancel ()
7783 repoDir , err := kcptestinghelpers .RepositoryDir ()
78- require .NoError (t , err )
79- require .NoError (t , ScrapeMetrics (ctx , srv .RootShardSystemMasterBaseConfig (t ), promUrl , repoDir , jobName , filepath .Join (srv .CADirectory (), "apiserver.crt" ), labels ))
84+ if err != nil {
85+ t .Logf ("error getting repository directory for server %s: %v" , srv .Name (), err )
86+ return
87+ }
88+
89+ if err := ScrapeMetrics (ctx , srv .RootShardSystemMasterBaseConfig (t ), promUrl , repoDir , jobName , caFile , labels ); err != nil {
90+ t .Logf ("error configuring Prometheus scraping for server %s: %v" , srv .Name (), err )
91+ }
92+
93+ // Clean up Prometheus configuration when test finishes
94+ t .Cleanup (func () {
95+ cleanupCtx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
96+ defer cancel ()
97+ if err := CleanupScrapeMetrics (cleanupCtx , promUrl , repoDir , jobName ); err != nil {
98+ t .Logf ("error cleaning up Prometheus scrape config for server %s: %v" , srv .Name (), err )
99+ }
100+ })
80101}
81102
82103func ScrapeMetrics (ctx context.Context , cfg * rest.Config , promUrl , promCfgDir , jobName , caFile string , labels map [string ]string ) error {
@@ -162,3 +183,92 @@ func ScrapeMetrics(ctx context.Context, cfg *rest.Config, promUrl, promCfgDir, j
162183 resp .Body .Close ()
163184 return nil
164185}
186+
187+ func CleanupScrapeMetrics (ctx context.Context , promUrl , promCfgDir , jobNamePrefix string ) error {
188+ type staticConfigs struct {
189+ Targets []string `yaml:"targets,omitempty"`
190+ Labels map [string ]string `yaml:"labels,omitempty"`
191+ }
192+ type tlsConfig struct {
193+ InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"`
194+ CaFile string `yaml:"ca_file,omitempty"`
195+ }
196+ type scrapeConfig struct {
197+ JobName string `yaml:"job_name,omitempty"`
198+ ScrapeInterval string `yaml:"scrape_interval,omitempty"`
199+ BearerToken string `yaml:"bearer_token,omitempty"`
200+ TlsConfig tlsConfig `yaml:"tls_config,omitempty"`
201+ Scheme string `yaml:"scheme,omitempty"`
202+ StaticConfigs []staticConfigs `yaml:"static_configs,omitempty"`
203+ }
204+ type config struct {
205+ ScrapeConfigs []scrapeConfig `yaml:"scrape_configs,omitempty"`
206+ }
207+
208+ err := func () error {
209+ scrapeConfigFile := filepath .Join (promCfgDir , ".prometheus-config.yaml" )
210+ f , err := os .OpenFile (scrapeConfigFile , os .O_RDWR , 0o644 )
211+ if os .IsNotExist (err ) {
212+ return nil // Nothing to clean up
213+ }
214+ if err != nil {
215+ return err
216+ }
217+ defer f .Close ()
218+
219+ // lock config file exclusively
220+ err = syscall .Flock (int (f .Fd ()), syscall .LOCK_EX )
221+ if err != nil {
222+ return err
223+ }
224+ defer func () {
225+ _ = syscall .Flock (int (f .Fd ()), syscall .LOCK_UN )
226+ }()
227+
228+ promCfg := config {}
229+ err = gopkgyaml .NewDecoder (f ).Decode (& promCfg )
230+ if err != nil && ! errors .Is (err , io .EOF ) {
231+ return err
232+ }
233+
234+ // Remove scrape configs that match the job name prefix
235+ var filteredConfigs []scrapeConfig
236+ for _ , cfg := range promCfg .ScrapeConfigs {
237+ // Check if CA file still exists - if not, remove the config
238+ if cfg .TlsConfig .CaFile != "" {
239+ if _ , err := os .Stat (cfg .TlsConfig .CaFile ); os .IsNotExist (err ) {
240+ continue // Skip this config - CA file is gone
241+ }
242+ }
243+ filteredConfigs = append (filteredConfigs , cfg )
244+ }
245+
246+ promCfg .ScrapeConfigs = filteredConfigs
247+
248+ err = f .Truncate (0 )
249+ if err != nil {
250+ return err
251+ }
252+ _ , err = f .Seek (0 , 0 )
253+ if err != nil {
254+ return err
255+ }
256+ return gopkgyaml .NewEncoder (f ).Encode (& promCfg )
257+ }()
258+ if err != nil {
259+ return err
260+ }
261+
262+ // Reload Prometheus configuration
263+ req , err := http .NewRequestWithContext (ctx , http .MethodPost , promUrl + "/-/reload" , http .NoBody )
264+ if err != nil {
265+ return err
266+ }
267+ c := & http.Client {}
268+ resp , err := c .Do (req )
269+ if err != nil {
270+ return err
271+ }
272+ resp .Body .Close ()
273+ return nil
274+ }
0 commit comments