Skip to content

Commit 0aa7334

Browse files
committed
reduce prometheus verbosity
1 parent fa4e118 commit 0aa7334

File tree

1 file changed

+113
-3
lines changed

1 file changed

+113
-3
lines changed

sdk/testing/server/metrics.go

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"syscall"
2929
"time"
3030

31-
"github.com/stretchr/testify/require"
3231
gopkgyaml "gopkg.in/yaml.v3"
3332

3433
"k8s.io/apimachinery/pkg/util/wait"
@@ -66,6 +65,13 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
6665
t.Logf("PROMETHEUS_URL environment variable unset, skipping Prometheus scrape config generation")
6766
return
6867
}
68+
69+
caFile := filepath.Join(srv.CADirectory(), "apiserver.crt")
70+
if _, err := os.Stat(caFile); os.IsNotExist(err) {
71+
t.Logf("CA file %s does not exist, skipping Prometheus scrape config for server %s", caFile, srv.Name())
72+
return
73+
}
74+
6975
jobName := fmt.Sprintf("kcp-%s-%s", srv.Name(), t.Name())
7076
labels := map[string]string{
7177
"server": srv.Name(),
@@ -75,8 +81,23 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
7581
ctx, cancel := context.WithTimeout(context.Background(), wait.ForeverTestTimeout)
7682
defer cancel()
7783
repoDir, err := kcptestinghelpers.RepositoryDir()
78-
require.NoError(t, err)
79-
require.NoError(t, ScrapeMetrics(ctx, srv.RootShardSystemMasterBaseConfig(t), promUrl, repoDir, jobName, filepath.Join(srv.CADirectory(), "apiserver.crt"), labels))
84+
if err != nil {
85+
t.Logf("error getting repository directory for server %s: %v", srv.Name(), err)
86+
return
87+
}
88+
89+
if err := ScrapeMetrics(ctx, srv.RootShardSystemMasterBaseConfig(t), promUrl, repoDir, jobName, caFile, labels); err != nil {
90+
t.Logf("error configuring Prometheus scraping for server %s: %v", srv.Name(), err)
91+
}
92+
93+
// Clean up Prometheus configuration when test finishes
94+
t.Cleanup(func() {
95+
cleanupCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
96+
defer cancel()
97+
if err := CleanupScrapeMetrics(cleanupCtx, promUrl, repoDir, jobName); err != nil {
98+
t.Logf("error cleaning up Prometheus scrape config for server %s: %v", srv.Name(), err)
99+
}
100+
})
80101
}
81102

82103
func ScrapeMetrics(ctx context.Context, cfg *rest.Config, promUrl, promCfgDir, jobName, caFile string, labels map[string]string) error {
@@ -162,3 +183,92 @@ func ScrapeMetrics(ctx context.Context, cfg *rest.Config, promUrl, promCfgDir, j
162183
resp.Body.Close()
163184
return nil
164185
}
186+
187+
func CleanupScrapeMetrics(ctx context.Context, promUrl, promCfgDir, jobNamePrefix string) error {
188+
type staticConfigs struct {
189+
Targets []string `yaml:"targets,omitempty"`
190+
Labels map[string]string `yaml:"labels,omitempty"`
191+
}
192+
type tlsConfig struct {
193+
InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"`
194+
CaFile string `yaml:"ca_file,omitempty"`
195+
}
196+
type scrapeConfig struct {
197+
JobName string `yaml:"job_name,omitempty"`
198+
ScrapeInterval string `yaml:"scrape_interval,omitempty"`
199+
BearerToken string `yaml:"bearer_token,omitempty"`
200+
TlsConfig tlsConfig `yaml:"tls_config,omitempty"`
201+
Scheme string `yaml:"scheme,omitempty"`
202+
StaticConfigs []staticConfigs `yaml:"static_configs,omitempty"`
203+
}
204+
type config struct {
205+
ScrapeConfigs []scrapeConfig `yaml:"scrape_configs,omitempty"`
206+
}
207+
208+
err := func() error {
209+
scrapeConfigFile := filepath.Join(promCfgDir, ".prometheus-config.yaml")
210+
f, err := os.OpenFile(scrapeConfigFile, os.O_RDWR, 0o644)
211+
if os.IsNotExist(err) {
212+
return nil // Nothing to clean up
213+
}
214+
if err != nil {
215+
return err
216+
}
217+
defer f.Close()
218+
219+
// lock config file exclusively
220+
err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
221+
if err != nil {
222+
return err
223+
}
224+
defer func() {
225+
_ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
226+
}()
227+
228+
promCfg := config{}
229+
err = gopkgyaml.NewDecoder(f).Decode(&promCfg)
230+
if err != nil && !errors.Is(err, io.EOF) {
231+
return err
232+
}
233+
234+
// Remove scrape configs that match the job name prefix
235+
var filteredConfigs []scrapeConfig
236+
for _, cfg := range promCfg.ScrapeConfigs {
237+
// Check if CA file still exists - if not, remove the config
238+
if cfg.TlsConfig.CaFile != "" {
239+
if _, err := os.Stat(cfg.TlsConfig.CaFile); os.IsNotExist(err) {
240+
continue // Skip this config - CA file is gone
241+
}
242+
}
243+
filteredConfigs = append(filteredConfigs, cfg)
244+
}
245+
246+
promCfg.ScrapeConfigs = filteredConfigs
247+
248+
err = f.Truncate(0)
249+
if err != nil {
250+
return err
251+
}
252+
_, err = f.Seek(0, 0)
253+
if err != nil {
254+
return err
255+
}
256+
return gopkgyaml.NewEncoder(f).Encode(&promCfg)
257+
}()
258+
if err != nil {
259+
return err
260+
}
261+
262+
// Reload Prometheus configuration
263+
req, err := http.NewRequestWithContext(ctx, http.MethodPost, promUrl+"/-/reload", http.NoBody)
264+
if err != nil {
265+
return err
266+
}
267+
c := &http.Client{}
268+
resp, err := c.Do(req)
269+
if err != nil {
270+
return err
271+
}
272+
resp.Body.Close()
273+
return nil
274+
}

0 commit comments

Comments
 (0)