package main import ( "bytes" "encoding/csv" "fmt" "log" "net/http" "os" "os/exec" "strconv" "strings" ) // name, index, temperature.gpu, utilization.gpu, // utilization.memory, memory.total, memory.free, memory.used func metrics(response http.ResponseWriter, request *http.Request) { out, err := exec.Command( "nvidia-smi", "--query-gpu=name,index,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used,ecc.errors.uncorrected.aggregate.total,ecc.errors.uncorrected.volatile.total,power.draw", "--format=csv,noheader,nounits").Output() if err != nil { fmt.Printf("%s\n", err) return } csvReader := csv.NewReader(bytes.NewReader(out)) csvReader.TrimLeadingSpace = true records, err := csvReader.ReadAll() if err != nil { fmt.Printf("%s\n", err) return } metricList := []string{ "temperature.gpu", "utilization.gpu", "utilization.memory", "memory.total", "memory.free", "memory.used", "eccerrorsaggregate.total", "eccuncorrectedvolatile.total", "power.draw"} result := "" max_id := "" for _, row := range records { name := fmt.Sprintf("%s[%s]", row[0], row[1]) for idx, value := range row[2:] { metric := strings.Replace(metricList[idx], ".", "_", -1) // Non numerical values like [N/A] wil break prometeus when a number is expected. _, err := strconv.Atoi(value) if err != nil { value = "-1" } result = fmt.Sprintf( "%s%s{gpu=\"%s\"} %s\n", result, metric, name, value) max_id = row[1] } deviceCount := 0 deviceCount, err = strconv.Atoi(max_id) result = fmt.Sprintf( "%sdeviceCount %d\n", result, deviceCount+1) } fmt.Fprintf(response, result) } func main() { addr := ":9101" if len(os.Args) > 1 { addr = ":" + os.Args[1] } http.HandleFunc("/metrics/", metrics) err := http.ListenAndServe(addr, nil) if err != nil { log.Fatal("ListenAndServe: ", err) } }