nightlight: implement promql arithmetic and quantiles

This commit is contained in:
centra 2026-04-01 15:46:11 +09:00
parent bf93c7bb87
commit 9b26deee9b
Signed by: centra
GPG key ID: 0C09689D20B25ACA
2 changed files with 1086 additions and 196 deletions

File diff suppressed because it is too large Load diff

View file

@ -4985,10 +4985,7 @@ validate_nightlight_flow_with_base() {
local metric_value local metric_value
metric_value="$(awk 'BEGIN{srand(); printf "%.3f\n", (rand()*100)+1}')" metric_value="$(awk 'BEGIN{srand(); printf "%.3f\n", (rand()*100)+1}')"
python3 "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py" \ nightlight_remote_write_sample "${base_url}" "${metric_name}" "${metric_value}" auto \
--url "${base_url}/write" \
--metric "${metric_name}" \
--value "${metric_value}" \
--label source=smoke \ --label source=smoke \
--label cluster=photoncloud --label cluster=photoncloud
@ -4998,6 +4995,34 @@ validate_nightlight_flow_with_base() {
| jq -e --arg name "${metric_name}" '.status == "success" and (.data | index($name)) != null' >/dev/null | jq -e --arg name "${metric_name}" '.status == "success" and (.data | index($name)) != null' >/dev/null
curl -fsS "${base_url}/series" \ curl -fsS "${base_url}/series" \
| jq -e --arg name "${metric_name}" '.status == "success" and (.data | any(.__name__ == $name))' >/dev/null | jq -e --arg name "${metric_name}" '.status == "success" and (.data | any(.__name__ == $name))' >/dev/null
validate_nightlight_promql_features "${base_url}" "${flow_name}"
}
nightlight_remote_write_sample() {
local base_url="$1"
local metric_name="$2"
local metric_value="$3"
local timestamp_ms="$4"
shift 4
local -a cmd=(
python3 "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py"
--url "${base_url}/write"
--metric "${metric_name}"
--value "${metric_value}"
)
if [[ "${timestamp_ms}" != "auto" ]]; then
cmd+=(--timestamp-ms "${timestamp_ms}")
fi
while (($#)); do
cmd+=("$1")
shift
done
"${cmd[@]}"
} }
wait_for_nightlight_query_result() { wait_for_nightlight_query_result() {
@ -5024,6 +5049,136 @@ wait_for_nightlight_query_result() {
done done
} }
validate_nightlight_promql_features() {
local base_url="$1"
local flow_name="$2"
local suffix="promql_$(date +%s)"
local requests_metric="nightlight_promql_requests_${suffix}"
local errors_metric="nightlight_promql_errors_${suffix}"
local cpu_metric="nightlight_promql_cpu_${suffix}"
local latency_metric="nightlight_promql_latency_${suffix}"
local timestamp_end timestamp_start step_ms
timestamp_end="$(date +%s%3N)"
step_ms=60000
timestamp_start=$((timestamp_end - step_ms))
log "Validating ${flow_name} PromQL arithmetic, grouping, histogram quantile, and range queries"
nightlight_remote_write_sample "${base_url}" "${requests_metric}" 20 "${timestamp_start}" \
--label suite=promql \
--label service=api
nightlight_remote_write_sample "${base_url}" "${requests_metric}" 40 "${timestamp_end}" \
--label suite=promql \
--label service=api
nightlight_remote_write_sample "${base_url}" "${errors_metric}" 4 "${timestamp_start}" \
--label suite=promql \
--label service=api
nightlight_remote_write_sample "${base_url}" "${errors_metric}" 10 "${timestamp_end}" \
--label suite=promql \
--label service=api
nightlight_remote_write_sample "${base_url}" "${cpu_metric}" 2 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=a
nightlight_remote_write_sample "${base_url}" "${cpu_metric}" 3 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=b
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 5 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=a \
--label le=0.1
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 10 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=a \
--label le=0.2
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 20 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=a \
--label le=+Inf
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 5 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=b \
--label le=0.1
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 10 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=b \
--label le=0.2
nightlight_remote_write_sample "${base_url}" "${latency_metric}" 20 "${timestamp_end}" \
--label suite=promql \
--label job=api \
--label instance=b \
--label le=+Inf
wait_for_nightlight_query_result \
"${base_url}" \
"${flow_name} PromQL latest sample" \
"${requests_metric}" \
40 \
"suite=\"promql\",service=\"api\""
curl -fsS --get "${base_url}/query" \
--data-urlencode "query=${errors_metric}{suite=\"promql\",service=\"api\"} / ${requests_metric}{suite=\"promql\",service=\"api\"}" \
| jq -e '
.status == "success"
and .data.resultType == "vector"
and (.data.result | length) == 1
and .data.result[0].metric.service == "api"
and (.data.result[0].metric | has("__name__") | not)
and (.data.result[0].value[1] >= 0.249)
and (.data.result[0].value[1] <= 0.251)
' >/dev/null
curl -fsS --get "${base_url}/query" \
--data-urlencode "query=sum by (job)(${cpu_metric}{suite=\"promql\"})" \
| jq -e '
.status == "success"
and .data.resultType == "vector"
and (.data.result | length) == 1
and .data.result[0].metric.job == "api"
and (.data.result[0].value[1] >= 4.999)
and (.data.result[0].value[1] <= 5.001)
' >/dev/null
curl -fsS --get "${base_url}/query" \
--data-urlencode "query=histogram_quantile(0.5, sum by (job, le)(${latency_metric}{suite=\"promql\",job=\"api\"}))" \
| jq -e '
.status == "success"
and .data.resultType == "vector"
and (.data.result | length) == 1
and .data.result[0].metric.job == "api"
and (.data.result[0].metric | has("le") | not)
and (.data.result[0].value[1] >= 0.199)
and (.data.result[0].value[1] <= 0.201)
' >/dev/null
curl -fsS --get "${base_url}/query_range" \
--data-urlencode "query=${requests_metric}{suite=\"promql\",service=\"api\"} / 2" \
--data-urlencode "start=${timestamp_start}" \
--data-urlencode "end=${timestamp_end}" \
--data-urlencode "step=${step_ms}" \
| jq -e --argjson start "${timestamp_start}" --argjson end "${timestamp_end}" '
.status == "success"
and .data.resultType == "matrix"
and (.data.result | length) == 1
and .data.result[0].metric.service == "api"
and .data.result[0].values[0][0] == $start
and (.data.result[0].values[0][1] >= 9.999)
and (.data.result[0].values[0][1] <= 10.001)
and .data.result[0].values[1][0] == $end
and (.data.result[0].values[1][1] >= 19.999)
and (.data.result[0].values[1][1] <= 20.001)
' >/dev/null
}
validate_nightlight_flow() { validate_nightlight_flow() {
validate_nightlight_flow_with_base "http://127.0.0.1:9090/api/v1" "NightLight" validate_nightlight_flow_with_base "http://127.0.0.1:9090/api/v1" "NightLight"
validate_nightlight_grpc_and_persistence validate_nightlight_grpc_and_persistence