Implement host lifecycle orchestration and distributed storage restructuring
This commit is contained in:
parent
a7d5cfa738
commit
6fa172eab1
124 changed files with 21742 additions and 4016 deletions
552
apigateway/Cargo.lock
generated
552
apigateway/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
434
chainfire/Cargo.lock
generated
434
chainfire/Cargo.lock
generated
|
|
@ -342,6 +342,12 @@ version = "1.0.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg_aliases"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chainfire-api"
|
name = "chainfire-api"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
@ -471,6 +477,7 @@ dependencies = [
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
"metrics",
|
"metrics",
|
||||||
"metrics-exporter-prometheus",
|
"metrics-exporter-prometheus",
|
||||||
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
|
@ -786,6 +793,17 @@ dependencies = [
|
||||||
"crypto-common",
|
"crypto-common",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "displaydoc"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dlv-list"
|
name = "dlv-list"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
|
@ -978,8 +996,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
|
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
"js-sys",
|
||||||
"libc",
|
"libc",
|
||||||
"wasi",
|
"wasi",
|
||||||
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -989,9 +1009,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
"js-sys",
|
||||||
"libc",
|
"libc",
|
||||||
"r-efi",
|
"r-efi",
|
||||||
"wasip2",
|
"wasip2",
|
||||||
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -1150,6 +1172,7 @@ dependencies = [
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
|
"webpki-roots",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -1171,6 +1194,7 @@ version = "0.1.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
|
checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
|
@ -1178,7 +1202,9 @@ dependencies = [
|
||||||
"http",
|
"http",
|
||||||
"http-body",
|
"http-body",
|
||||||
"hyper",
|
"hyper",
|
||||||
|
"ipnet",
|
||||||
"libc",
|
"libc",
|
||||||
|
"percent-encoding",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"socket2 0.6.1",
|
"socket2 0.6.1",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|
@ -1210,6 +1236,108 @@ dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_collections"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"potential_utf",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_locale_core"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"litemap",
|
||||||
|
"tinystr",
|
||||||
|
"writeable",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_normalizer"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
|
||||||
|
dependencies = [
|
||||||
|
"icu_collections",
|
||||||
|
"icu_normalizer_data",
|
||||||
|
"icu_properties",
|
||||||
|
"icu_provider",
|
||||||
|
"smallvec",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_normalizer_data"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_properties"
|
||||||
|
version = "2.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
|
||||||
|
dependencies = [
|
||||||
|
"icu_collections",
|
||||||
|
"icu_locale_core",
|
||||||
|
"icu_properties_data",
|
||||||
|
"icu_provider",
|
||||||
|
"zerotrie",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_properties_data"
|
||||||
|
version = "2.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"icu_locale_core",
|
||||||
|
"writeable",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerotrie",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
|
||||||
|
dependencies = [
|
||||||
|
"idna_adapter",
|
||||||
|
"smallvec",
|
||||||
|
"utf8_iter",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna_adapter"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
|
||||||
|
dependencies = [
|
||||||
|
"icu_normalizer",
|
||||||
|
"icu_properties",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "1.9.3"
|
version = "1.9.3"
|
||||||
|
|
@ -1236,6 +1364,16 @@ version = "2.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iri-string"
|
||||||
|
version = "0.7.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is-terminal"
|
name = "is-terminal"
|
||||||
version = "0.4.17"
|
version = "0.4.17"
|
||||||
|
|
@ -1367,6 +1505,12 @@ version = "0.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
|
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "litemap"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.14"
|
version = "0.4.14"
|
||||||
|
|
@ -1382,6 +1526,12 @@ version = "0.4.29"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lru-slab"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lz4-sys"
|
name = "lz4-sys"
|
||||||
version = "1.11.1+lz4-1.10.0"
|
version = "1.11.1+lz4-1.10.0"
|
||||||
|
|
@ -1730,6 +1880,15 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "potential_utf"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
|
||||||
|
dependencies = [
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.21"
|
version = "0.2.21"
|
||||||
|
|
@ -1889,6 +2048,61 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quinn"
|
||||||
|
version = "0.11.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"cfg_aliases",
|
||||||
|
"pin-project-lite",
|
||||||
|
"quinn-proto",
|
||||||
|
"quinn-udp",
|
||||||
|
"rustc-hash",
|
||||||
|
"rustls",
|
||||||
|
"socket2 0.6.1",
|
||||||
|
"thiserror 2.0.17",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
"web-time",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quinn-proto"
|
||||||
|
version = "0.11.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"getrandom 0.3.4",
|
||||||
|
"lru-slab",
|
||||||
|
"rand 0.9.2",
|
||||||
|
"ring",
|
||||||
|
"rustc-hash",
|
||||||
|
"rustls",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"slab",
|
||||||
|
"thiserror 2.0.17",
|
||||||
|
"tinyvec",
|
||||||
|
"tracing",
|
||||||
|
"web-time",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quinn-udp"
|
||||||
|
version = "0.5.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
|
||||||
|
dependencies = [
|
||||||
|
"cfg_aliases",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"socket2 0.6.1",
|
||||||
|
"tracing",
|
||||||
|
"windows-sys 0.60.2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.42"
|
version = "1.0.42"
|
||||||
|
|
@ -2030,6 +2244,44 @@ version = "0.8.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "reqwest"
|
||||||
|
version = "0.12.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
|
"bytes",
|
||||||
|
"futures-core",
|
||||||
|
"http",
|
||||||
|
"http-body",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper",
|
||||||
|
"hyper-rustls",
|
||||||
|
"hyper-util",
|
||||||
|
"js-sys",
|
||||||
|
"log",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"quinn",
|
||||||
|
"rustls",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"serde_urlencoded",
|
||||||
|
"sync_wrapper",
|
||||||
|
"tokio",
|
||||||
|
"tokio-rustls",
|
||||||
|
"tower 0.5.2",
|
||||||
|
"tower-http",
|
||||||
|
"tower-service",
|
||||||
|
"url",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
"webpki-roots",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.14"
|
version = "0.17.14"
|
||||||
|
|
@ -2137,6 +2389,7 @@ version = "1.13.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c"
|
checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"web-time",
|
||||||
"zeroize",
|
"zeroize",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -2359,6 +2612,12 @@ dependencies = [
|
||||||
"windows-sys 0.60.2",
|
"windows-sys 0.60.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stable_deref_trait"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
|
|
@ -2387,6 +2646,20 @@ name = "sync_wrapper"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
|
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "synstructure"
|
||||||
|
version = "0.13.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tempfile"
|
name = "tempfile"
|
||||||
|
|
@ -2450,6 +2723,16 @@ dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinystr"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinytemplate"
|
name = "tinytemplate"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
|
|
@ -2460,6 +2743,21 @@ dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec"
|
||||||
|
version = "1.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec_macros"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.48.0"
|
version = "1.48.0"
|
||||||
|
|
@ -2676,9 +2974,12 @@ checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.10.0",
|
"bitflags 2.10.0",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
"futures-util",
|
||||||
"http",
|
"http",
|
||||||
"http-body",
|
"http-body",
|
||||||
|
"iri-string",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
|
"tower 0.5.2",
|
||||||
"tower-layer",
|
"tower-layer",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"tracing",
|
"tracing",
|
||||||
|
|
@ -2788,6 +3089,24 @@ version = "0.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "url"
|
||||||
|
version = "2.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
|
||||||
|
dependencies = [
|
||||||
|
"form_urlencoded",
|
||||||
|
"idna",
|
||||||
|
"percent-encoding",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8_iter"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8parse"
|
name = "utf8parse"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
|
|
@ -2871,6 +3190,19 @@ dependencies = [
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-futures"
|
||||||
|
version = "0.4.56"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"js-sys",
|
||||||
|
"once_cell",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro"
|
name = "wasm-bindgen-macro"
|
||||||
version = "0.2.106"
|
version = "0.2.106"
|
||||||
|
|
@ -2913,6 +3245,25 @@ dependencies = [
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "web-time"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webpki-roots"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
|
||||||
|
dependencies = [
|
||||||
|
"rustls-pki-types",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
|
@ -3174,6 +3525,12 @@ version = "0.46.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
|
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "writeable"
|
||||||
|
version = "0.6.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "yaml-rust"
|
name = "yaml-rust"
|
||||||
version = "0.4.5"
|
version = "0.4.5"
|
||||||
|
|
@ -3183,6 +3540,29 @@ dependencies = [
|
||||||
"linked-hash-map",
|
"linked-hash-map",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
|
||||||
|
dependencies = [
|
||||||
|
"stable_deref_trait",
|
||||||
|
"yoke-derive",
|
||||||
|
"zerofrom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke-derive"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerocopy"
|
name = "zerocopy"
|
||||||
version = "0.8.31"
|
version = "0.8.31"
|
||||||
|
|
@ -3203,12 +3583,66 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom"
|
||||||
|
version = "0.1.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
|
||||||
|
dependencies = [
|
||||||
|
"zerofrom-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom-derive"
|
||||||
|
version = "0.1.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zeroize"
|
name = "zeroize"
|
||||||
version = "1.8.2"
|
version = "1.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerotrie"
|
||||||
|
version = "0.2.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec"
|
||||||
|
version = "0.11.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
|
||||||
|
dependencies = [
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec-derive"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd-sys"
|
name = "zstd-sys"
|
||||||
version = "2.0.16+zstd.1.5.7"
|
version = "2.0.16+zstd.1.5.7"
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,17 @@ use chainfire_proto::proto::{
|
||||||
StatusRequest,
|
StatusRequest,
|
||||||
TxnRequest,
|
TxnRequest,
|
||||||
};
|
};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tonic::Code;
|
||||||
use tonic::transport::Channel;
|
use tonic::transport::Channel;
|
||||||
use tracing::debug;
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
/// Chainfire client
|
/// Chainfire client
|
||||||
pub struct Client {
|
pub struct Client {
|
||||||
|
/// Configured client endpoints
|
||||||
|
endpoints: Vec<String>,
|
||||||
|
/// Preferred endpoint index
|
||||||
|
current_endpoint: usize,
|
||||||
/// gRPC channel
|
/// gRPC channel
|
||||||
channel: Channel,
|
channel: Channel,
|
||||||
/// KV client
|
/// KV client
|
||||||
|
|
@ -34,36 +40,187 @@ pub struct Client {
|
||||||
impl Client {
|
impl Client {
|
||||||
/// Connect to a Chainfire server
|
/// Connect to a Chainfire server
|
||||||
pub async fn connect(addr: impl AsRef<str>) -> Result<Self> {
|
pub async fn connect(addr: impl AsRef<str>) -> Result<Self> {
|
||||||
let addr = addr.as_ref().to_string();
|
let endpoints = parse_endpoints(addr.as_ref())?;
|
||||||
debug!(addr = %addr, "Connecting to Chainfire");
|
let mut last_error = None;
|
||||||
|
|
||||||
let channel = Channel::from_shared(addr)
|
for (index, endpoint) in endpoints.iter().enumerate() {
|
||||||
.map_err(|e| ClientError::Connection(e.to_string()))?
|
match connect_endpoint(endpoint).await {
|
||||||
.connect()
|
Ok((channel, kv, cluster)) => {
|
||||||
.await?;
|
debug!(endpoint = %endpoint, "Connected to Chainfire");
|
||||||
|
let mut client = Self {
|
||||||
|
endpoints: endpoints.clone(),
|
||||||
|
current_endpoint: index,
|
||||||
|
channel,
|
||||||
|
kv,
|
||||||
|
cluster,
|
||||||
|
};
|
||||||
|
client.promote_leader_endpoint().await?;
|
||||||
|
return Ok(client);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(endpoint = %endpoint, error = %error, "Chainfire endpoint connect failed");
|
||||||
|
last_error = Some(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let kv = KvClient::new(channel.clone());
|
Err(last_error.unwrap_or_else(|| ClientError::Connection("no Chainfire endpoints configured".to_string())))
|
||||||
let cluster = ClusterClient::new(channel.clone());
|
}
|
||||||
|
|
||||||
Ok(Self {
|
async fn with_kv_retry<T, F, Fut>(&mut self, mut op: F) -> Result<T>
|
||||||
channel,
|
where
|
||||||
kv,
|
F: FnMut(KvClient<Channel>) -> Fut,
|
||||||
cluster,
|
Fut: std::future::Future<Output = std::result::Result<T, tonic::Status>>,
|
||||||
})
|
{
|
||||||
|
let max_attempts = self.endpoints.len().max(1) * 3;
|
||||||
|
let mut last_status = None;
|
||||||
|
for attempt in 0..max_attempts {
|
||||||
|
let client = self.kv.clone();
|
||||||
|
match op(client).await {
|
||||||
|
Ok(value) => return Ok(value),
|
||||||
|
Err(status) if attempt + 1 < max_attempts && is_retryable_status(&status) => {
|
||||||
|
warn!(
|
||||||
|
endpoint = %self.endpoints[self.current_endpoint],
|
||||||
|
code = ?status.code(),
|
||||||
|
message = %status.message(),
|
||||||
|
attempt = attempt + 1,
|
||||||
|
max_attempts,
|
||||||
|
"retrying Chainfire KV RPC on alternate endpoint"
|
||||||
|
);
|
||||||
|
last_status = Some(status);
|
||||||
|
self.recover_after_status(last_status.as_ref().unwrap()).await?;
|
||||||
|
tokio::time::sleep(retry_delay(attempt)).await;
|
||||||
|
}
|
||||||
|
Err(status) => return Err(status.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(last_status.unwrap_or_else(|| tonic::Status::unavailable("Chainfire KV retry exhausted")).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn with_cluster_retry<T, F, Fut>(&mut self, mut op: F) -> Result<T>
|
||||||
|
where
|
||||||
|
F: FnMut(ClusterClient<Channel>) -> Fut,
|
||||||
|
Fut: std::future::Future<Output = std::result::Result<T, tonic::Status>>,
|
||||||
|
{
|
||||||
|
let max_attempts = self.endpoints.len().max(1) * 3;
|
||||||
|
let mut last_status = None;
|
||||||
|
for attempt in 0..max_attempts {
|
||||||
|
let client = self.cluster.clone();
|
||||||
|
match op(client).await {
|
||||||
|
Ok(value) => return Ok(value),
|
||||||
|
Err(status) if attempt + 1 < max_attempts && is_retryable_status(&status) => {
|
||||||
|
warn!(
|
||||||
|
endpoint = %self.endpoints[self.current_endpoint],
|
||||||
|
code = ?status.code(),
|
||||||
|
message = %status.message(),
|
||||||
|
attempt = attempt + 1,
|
||||||
|
max_attempts,
|
||||||
|
"retrying Chainfire cluster RPC on alternate endpoint"
|
||||||
|
);
|
||||||
|
last_status = Some(status);
|
||||||
|
self.recover_after_status(last_status.as_ref().unwrap()).await?;
|
||||||
|
tokio::time::sleep(retry_delay(attempt)).await;
|
||||||
|
}
|
||||||
|
Err(status) => return Err(status.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(last_status.unwrap_or_else(|| tonic::Status::unavailable("Chainfire cluster retry exhausted")).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn recover_after_status(&mut self, status: &tonic::Status) -> Result<()> {
|
||||||
|
if let Some(leader_idx) = self.discover_leader_endpoint().await? {
|
||||||
|
if leader_idx != self.current_endpoint {
|
||||||
|
return self.reconnect_to_index(leader_idx).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.endpoints.len() > 1 {
|
||||||
|
let next = (self.current_endpoint + 1) % self.endpoints.len();
|
||||||
|
if next != self.current_endpoint {
|
||||||
|
return self.reconnect_to_index(next).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(ClientError::Rpc(status.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reconnect_to_index(&mut self, index: usize) -> Result<()> {
|
||||||
|
let endpoint = self
|
||||||
|
.endpoints
|
||||||
|
.get(index)
|
||||||
|
.ok_or_else(|| ClientError::Connection(format!("invalid Chainfire endpoint index {index}")))?
|
||||||
|
.clone();
|
||||||
|
let (channel, kv, cluster) = connect_endpoint(&endpoint).await?;
|
||||||
|
self.current_endpoint = index;
|
||||||
|
self.channel = channel;
|
||||||
|
self.kv = kv;
|
||||||
|
self.cluster = cluster;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn promote_leader_endpoint(&mut self) -> Result<()> {
|
||||||
|
if let Some(index) = self.discover_leader_endpoint().await? {
|
||||||
|
if index != self.current_endpoint {
|
||||||
|
self.reconnect_to_index(index).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn discover_leader_endpoint(&self) -> Result<Option<usize>> {
|
||||||
|
for (index, endpoint) in self.endpoints.iter().enumerate() {
|
||||||
|
let mut cluster = match ClusterClient::connect(endpoint.clone()).await {
|
||||||
|
Ok(client) => client,
|
||||||
|
Err(error) => {
|
||||||
|
warn!(endpoint = %endpoint, error = %error, "failed to connect while probing Chainfire leader");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match cluster.status(StatusRequest {}).await {
|
||||||
|
Ok(response) => {
|
||||||
|
let status = response.into_inner();
|
||||||
|
let member_id = status.header.as_ref().map(|header| header.member_id).unwrap_or(0);
|
||||||
|
if status.leader != 0 && status.leader == member_id {
|
||||||
|
return Ok(Some(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(status) => {
|
||||||
|
warn!(
|
||||||
|
endpoint = %endpoint,
|
||||||
|
code = ?status.code(),
|
||||||
|
message = %status.message(),
|
||||||
|
"failed to query Chainfire leader status"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Put a key-value pair
|
/// Put a key-value pair
|
||||||
pub async fn put(&mut self, key: impl AsRef<[u8]>, value: impl AsRef<[u8]>) -> Result<u64> {
|
pub async fn put(&mut self, key: impl AsRef<[u8]>, value: impl AsRef<[u8]>) -> Result<u64> {
|
||||||
|
let key = key.as_ref().to_vec();
|
||||||
|
let value = value.as_ref().to_vec();
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.put(PutRequest {
|
let key = key.clone();
|
||||||
key: key.as_ref().to_vec(),
|
let value = value.clone();
|
||||||
value: value.as_ref().to_vec(),
|
async move {
|
||||||
lease: 0,
|
kv.put(PutRequest {
|
||||||
prev_kv: false,
|
key,
|
||||||
|
value,
|
||||||
|
lease: 0,
|
||||||
|
prev_kv: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
Ok(resp.header.map(|h| h.revision as u64).unwrap_or(0))
|
Ok(resp.header.map(|h| h.revision as u64).unwrap_or(0))
|
||||||
}
|
}
|
||||||
|
|
@ -86,19 +243,25 @@ impl Client {
|
||||||
&mut self,
|
&mut self,
|
||||||
key: impl AsRef<[u8]>,
|
key: impl AsRef<[u8]>,
|
||||||
) -> Result<Option<(Vec<u8>, u64)>> {
|
) -> Result<Option<(Vec<u8>, u64)>> {
|
||||||
|
let key = key.as_ref().to_vec();
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.range(RangeRequest {
|
let key = key.clone();
|
||||||
key: key.as_ref().to_vec(),
|
async move {
|
||||||
range_end: vec![],
|
kv.range(RangeRequest {
|
||||||
limit: 1,
|
key,
|
||||||
revision: 0,
|
range_end: vec![],
|
||||||
keys_only: false,
|
limit: 1,
|
||||||
count_only: false,
|
revision: 0,
|
||||||
serializable: false, // default: linearizable read
|
keys_only: false,
|
||||||
|
count_only: false,
|
||||||
|
serializable: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
Ok(resp.kvs.into_iter().next().map(|kv| (kv.value, kv.mod_revision as u64)))
|
Ok(resp.kvs.into_iter().next().map(|kv| (kv.value, kv.mod_revision as u64)))
|
||||||
}
|
}
|
||||||
|
|
@ -132,14 +295,20 @@ impl Client {
|
||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
|
|
||||||
self.kv
|
self.with_kv_retry(|mut kv| {
|
||||||
.txn(TxnRequest {
|
let compare = compare.clone();
|
||||||
compare: vec![compare],
|
let put_op = put_op.clone();
|
||||||
success: vec![put_op],
|
async move {
|
||||||
failure: vec![],
|
kv.txn(TxnRequest {
|
||||||
})
|
compare: vec![compare],
|
||||||
.await?
|
success: vec![put_op],
|
||||||
.into_inner();
|
failure: vec![],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -152,15 +321,21 @@ impl Client {
|
||||||
|
|
||||||
/// Delete a key
|
/// Delete a key
|
||||||
pub async fn delete(&mut self, key: impl AsRef<[u8]>) -> Result<bool> {
|
pub async fn delete(&mut self, key: impl AsRef<[u8]>) -> Result<bool> {
|
||||||
|
let key = key.as_ref().to_vec();
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.delete(DeleteRangeRequest {
|
let key = key.clone();
|
||||||
key: key.as_ref().to_vec(),
|
async move {
|
||||||
range_end: vec![],
|
kv.delete(DeleteRangeRequest {
|
||||||
prev_kv: false,
|
key,
|
||||||
|
range_end: vec![],
|
||||||
|
prev_kv: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
Ok(resp.deleted > 0)
|
Ok(resp.deleted > 0)
|
||||||
}
|
}
|
||||||
|
|
@ -171,18 +346,24 @@ impl Client {
|
||||||
let range_end = prefix_end(prefix);
|
let range_end = prefix_end(prefix);
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.range(RangeRequest {
|
let key = prefix.to_vec();
|
||||||
key: prefix.to_vec(),
|
let range_end = range_end.clone();
|
||||||
range_end,
|
async move {
|
||||||
limit: 0,
|
kv.range(RangeRequest {
|
||||||
revision: 0,
|
key,
|
||||||
keys_only: false,
|
range_end,
|
||||||
count_only: false,
|
limit: 0,
|
||||||
serializable: false,
|
revision: 0,
|
||||||
|
keys_only: false,
|
||||||
|
count_only: false,
|
||||||
|
serializable: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
Ok(resp.kvs.into_iter().map(|kv| (kv.key, kv.value)).collect())
|
Ok(resp.kvs.into_iter().map(|kv| (kv.key, kv.value)).collect())
|
||||||
}
|
}
|
||||||
|
|
@ -197,18 +378,24 @@ impl Client {
|
||||||
let range_end = prefix_end(prefix);
|
let range_end = prefix_end(prefix);
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.range(RangeRequest {
|
let key = prefix.to_vec();
|
||||||
key: prefix.to_vec(),
|
let range_end = range_end.clone();
|
||||||
range_end,
|
async move {
|
||||||
limit,
|
kv.range(RangeRequest {
|
||||||
revision: 0,
|
key,
|
||||||
keys_only: false,
|
range_end,
|
||||||
count_only: false,
|
limit,
|
||||||
serializable: false,
|
revision: 0,
|
||||||
|
keys_only: false,
|
||||||
|
count_only: false,
|
||||||
|
serializable: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
let more = resp.more;
|
let more = resp.more;
|
||||||
let kvs: Vec<(Vec<u8>, Vec<u8>, u64)> = resp
|
let kvs: Vec<(Vec<u8>, Vec<u8>, u64)> = resp
|
||||||
|
|
@ -238,18 +425,24 @@ impl Client {
|
||||||
limit: i64,
|
limit: i64,
|
||||||
) -> Result<(Vec<(Vec<u8>, Vec<u8>, u64)>, Option<Vec<u8>>)> {
|
) -> Result<(Vec<(Vec<u8>, Vec<u8>, u64)>, Option<Vec<u8>>)> {
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.range(RangeRequest {
|
let key = start.as_ref().to_vec();
|
||||||
key: start.as_ref().to_vec(),
|
let range_end = end.as_ref().to_vec();
|
||||||
range_end: end.as_ref().to_vec(),
|
async move {
|
||||||
limit,
|
kv.range(RangeRequest {
|
||||||
revision: 0,
|
key,
|
||||||
keys_only: false,
|
range_end,
|
||||||
count_only: false,
|
limit,
|
||||||
serializable: false,
|
revision: 0,
|
||||||
|
keys_only: false,
|
||||||
|
count_only: false,
|
||||||
|
serializable: false,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
let more = resp.more;
|
let more = resp.more;
|
||||||
let kvs: Vec<(Vec<u8>, Vec<u8>, u64)> = resp
|
let kvs: Vec<(Vec<u8>, Vec<u8>, u64)> = resp
|
||||||
|
|
@ -309,14 +502,21 @@ impl Client {
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.kv
|
.with_kv_retry(|mut kv| {
|
||||||
.txn(TxnRequest {
|
let compare = compare.clone();
|
||||||
compare: vec![compare],
|
let put_op = put_op.clone();
|
||||||
success: vec![put_op],
|
let read_on_fail = read_on_fail.clone();
|
||||||
failure: vec![read_on_fail],
|
async move {
|
||||||
|
kv.txn(TxnRequest {
|
||||||
|
compare: vec![compare],
|
||||||
|
success: vec![put_op],
|
||||||
|
failure: vec![read_on_fail],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
if resp.succeeded {
|
if resp.succeeded {
|
||||||
let new_version = resp
|
let new_version = resp
|
||||||
|
|
@ -371,10 +571,13 @@ impl Client {
|
||||||
/// Get cluster status
|
/// Get cluster status
|
||||||
pub async fn status(&mut self) -> Result<ClusterStatus> {
|
pub async fn status(&mut self) -> Result<ClusterStatus> {
|
||||||
let resp = self
|
let resp = self
|
||||||
.cluster
|
.with_cluster_retry(|mut cluster| async move {
|
||||||
.status(StatusRequest {})
|
cluster
|
||||||
.await?
|
.status(StatusRequest {})
|
||||||
.into_inner();
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
Ok(ClusterStatus {
|
Ok(ClusterStatus {
|
||||||
version: resp.version,
|
version: resp.version,
|
||||||
|
|
@ -392,15 +595,22 @@ impl Client {
|
||||||
/// # Returns
|
/// # Returns
|
||||||
/// The node ID of the added member
|
/// The node ID of the added member
|
||||||
pub async fn member_add(&mut self, node_id: u64, peer_url: impl AsRef<str>, is_learner: bool) -> Result<u64> {
|
pub async fn member_add(&mut self, node_id: u64, peer_url: impl AsRef<str>, is_learner: bool) -> Result<u64> {
|
||||||
|
let peer_url = peer_url.as_ref().to_string();
|
||||||
let resp = self
|
let resp = self
|
||||||
.cluster
|
.with_cluster_retry(|mut cluster| {
|
||||||
.member_add(MemberAddRequest {
|
let peer_url = peer_url.clone();
|
||||||
node_id,
|
async move {
|
||||||
peer_urls: vec![peer_url.as_ref().to_string()],
|
cluster
|
||||||
is_learner,
|
.member_add(MemberAddRequest {
|
||||||
|
node_id,
|
||||||
|
peer_urls: vec![peer_url],
|
||||||
|
is_learner,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|resp| resp.into_inner())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await?
|
.await?;
|
||||||
.into_inner();
|
|
||||||
|
|
||||||
// Extract the member ID from the response
|
// Extract the member ID from the response
|
||||||
let member_id = resp
|
let member_id = resp
|
||||||
|
|
@ -410,7 +620,7 @@ impl Client {
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
member_id = member_id,
|
member_id = member_id,
|
||||||
peer_url = peer_url.as_ref(),
|
peer_url = peer_url.as_str(),
|
||||||
is_learner = is_learner,
|
is_learner = is_learner,
|
||||||
"Added member to cluster"
|
"Added member to cluster"
|
||||||
);
|
);
|
||||||
|
|
@ -441,6 +651,64 @@ pub struct CasOutcome {
|
||||||
pub new_version: u64,
|
pub new_version: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_endpoints(input: &str) -> Result<Vec<String>> {
|
||||||
|
let endpoints: Vec<String> = input
|
||||||
|
.split(',')
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|value| !value.is_empty())
|
||||||
|
.map(normalize_endpoint)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if endpoints.is_empty() {
|
||||||
|
return Err(ClientError::Connection("no Chainfire endpoints configured".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(endpoints)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_endpoint(endpoint: &str) -> String {
|
||||||
|
if endpoint.contains("://") {
|
||||||
|
endpoint.to_string()
|
||||||
|
} else {
|
||||||
|
format!("http://{endpoint}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn connect_endpoint(endpoint: &str) -> Result<(Channel, KvClient<Channel>, ClusterClient<Channel>)> {
|
||||||
|
let channel = Channel::from_shared(endpoint.to_string())
|
||||||
|
.map_err(|e| ClientError::Connection(e.to_string()))?
|
||||||
|
.connect()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let kv = KvClient::new(channel.clone());
|
||||||
|
let cluster = ClusterClient::new(channel.clone());
|
||||||
|
Ok((channel, kv, cluster))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retry_delay(attempt: usize) -> Duration {
|
||||||
|
let multiplier = 1u64 << attempt.min(3);
|
||||||
|
Duration::from_millis((200 * multiplier).min(1_000))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_retryable_status(status: &tonic::Status) -> bool {
|
||||||
|
matches!(
|
||||||
|
status.code(),
|
||||||
|
Code::Unavailable | Code::DeadlineExceeded | Code::Internal | Code::Aborted | Code::FailedPrecondition
|
||||||
|
) || retryable_message(status.message())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retryable_message(message: &str) -> bool {
|
||||||
|
let lowercase = message.to_ascii_lowercase();
|
||||||
|
lowercase.contains("not leader")
|
||||||
|
|| lowercase.contains("leader_id")
|
||||||
|
|| lowercase.contains("transport error")
|
||||||
|
|| lowercase.contains("connection was not ready")
|
||||||
|
|| lowercase.contains("deadline has elapsed")
|
||||||
|
|| lowercase.contains("broken pipe")
|
||||||
|
|| lowercase.contains("connection reset")
|
||||||
|
|| lowercase.contains("connection refused")
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculate prefix end for range queries
|
/// Calculate prefix end for range queries
|
||||||
fn prefix_end(prefix: &[u8]) -> Vec<u8> {
|
fn prefix_end(prefix: &[u8]) -> Vec<u8> {
|
||||||
let mut end = prefix.to_vec();
|
let mut end = prefix.to_vec();
|
||||||
|
|
@ -463,4 +731,30 @@ mod tests {
|
||||||
assert_eq!(prefix_end(b"abc"), b"abd");
|
assert_eq!(prefix_end(b"abc"), b"abd");
|
||||||
assert_eq!(prefix_end(b"/nodes/"), b"/nodes0");
|
assert_eq!(prefix_end(b"/nodes/"), b"/nodes0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_endpoint_adds_http_scheme() {
|
||||||
|
assert_eq!(normalize_endpoint("127.0.0.1:2379"), "http://127.0.0.1:2379");
|
||||||
|
assert_eq!(normalize_endpoint("http://127.0.0.1:2379"), "http://127.0.0.1:2379");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_endpoints_accepts_comma_separated_values() {
|
||||||
|
let endpoints = parse_endpoints("127.0.0.1:2379, http://127.0.0.2:2379").unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
endpoints,
|
||||||
|
vec![
|
||||||
|
"http://127.0.0.1:2379".to_string(),
|
||||||
|
"http://127.0.0.2:2379".to_string()
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retryable_message_covers_not_leader_and_transport() {
|
||||||
|
assert!(retryable_message("NotLeader { leader_id: Some(1) }"));
|
||||||
|
assert!(retryable_message("transport error"));
|
||||||
|
assert!(retryable_message("connection was not ready"));
|
||||||
|
assert!(!retryable_message("permission denied"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,17 +27,25 @@ pub struct ClusterServiceImpl {
|
||||||
rpc_client: Arc<crate::GrpcRaftClient>,
|
rpc_client: Arc<crate::GrpcRaftClient>,
|
||||||
/// Cluster ID
|
/// Cluster ID
|
||||||
cluster_id: u64,
|
cluster_id: u64,
|
||||||
|
/// Configured members with client and peer URLs
|
||||||
|
members: Vec<Member>,
|
||||||
/// Server version
|
/// Server version
|
||||||
version: String,
|
version: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ClusterServiceImpl {
|
impl ClusterServiceImpl {
|
||||||
/// Create a new cluster service
|
/// Create a new cluster service
|
||||||
pub fn new(raft: Arc<RaftCore>, rpc_client: Arc<crate::GrpcRaftClient>, cluster_id: u64) -> Self {
|
pub fn new(
|
||||||
|
raft: Arc<RaftCore>,
|
||||||
|
rpc_client: Arc<crate::GrpcRaftClient>,
|
||||||
|
cluster_id: u64,
|
||||||
|
members: Vec<Member>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
raft,
|
raft,
|
||||||
rpc_client,
|
rpc_client,
|
||||||
cluster_id,
|
cluster_id,
|
||||||
|
members,
|
||||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -47,16 +55,19 @@ impl ClusterServiceImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get current members as proto Member list
|
/// Get current members as proto Member list
|
||||||
/// NOTE: Custom RaftCore doesn't track membership dynamically yet
|
/// NOTE: Custom RaftCore doesn't track membership dynamically yet, so this returns
|
||||||
|
/// the configured static membership that the server was booted with.
|
||||||
async fn get_member_list(&self) -> Vec<Member> {
|
async fn get_member_list(&self) -> Vec<Member> {
|
||||||
// For now, return only the current node
|
if self.members.is_empty() {
|
||||||
vec![Member {
|
return vec![Member {
|
||||||
id: self.raft.node_id(),
|
id: self.raft.node_id(),
|
||||||
name: format!("node-{}", self.raft.node_id()),
|
name: format!("node-{}", self.raft.node_id()),
|
||||||
peer_urls: vec![],
|
peer_urls: vec![],
|
||||||
client_urls: vec![],
|
client_urls: vec![],
|
||||||
is_learner: false,
|
is_learner: false,
|
||||||
}]
|
}];
|
||||||
|
}
|
||||||
|
self.members.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ http-body-util = { workspace = true }
|
||||||
uuid = { version = "1.11", features = ["v4", "serde"] }
|
uuid = { version = "1.11", features = ["v4", "serde"] }
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -11,13 +11,14 @@
|
||||||
use axum::{
|
use axum::{
|
||||||
extract::{Path, Query, State},
|
extract::{Path, Query, State},
|
||||||
http::StatusCode,
|
http::StatusCode,
|
||||||
routing::{delete, get, post, put},
|
routing::{get, post},
|
||||||
Json, Router,
|
Json, Router,
|
||||||
};
|
};
|
||||||
use chainfire_api::GrpcRaftClient;
|
use chainfire_api::GrpcRaftClient;
|
||||||
use chainfire_raft::RaftCore;
|
use chainfire_raft::{core::RaftError, RaftCore};
|
||||||
use chainfire_types::command::RaftCommand;
|
use chainfire_types::command::RaftCommand;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
/// REST API state
|
/// REST API state
|
||||||
|
|
@ -26,16 +27,18 @@ pub struct RestApiState {
|
||||||
pub raft: Arc<RaftCore>,
|
pub raft: Arc<RaftCore>,
|
||||||
pub cluster_id: u64,
|
pub cluster_id: u64,
|
||||||
pub rpc_client: Option<Arc<GrpcRaftClient>>,
|
pub rpc_client: Option<Arc<GrpcRaftClient>>,
|
||||||
|
pub http_client: reqwest::Client,
|
||||||
|
pub peer_http_addrs: Arc<HashMap<u64, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Standard REST error response
|
/// Standard REST error response
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct ErrorResponse {
|
pub struct ErrorResponse {
|
||||||
pub error: ErrorDetail,
|
pub error: ErrorDetail,
|
||||||
pub meta: ResponseMeta,
|
pub meta: ResponseMeta,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct ErrorDetail {
|
pub struct ErrorDetail {
|
||||||
pub code: String,
|
pub code: String,
|
||||||
pub message: String,
|
pub message: String,
|
||||||
|
|
@ -43,7 +46,7 @@ pub struct ErrorDetail {
|
||||||
pub details: Option<serde_json::Value>,
|
pub details: Option<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct ResponseMeta {
|
pub struct ResponseMeta {
|
||||||
pub request_id: String,
|
pub request_id: String,
|
||||||
pub timestamp: String,
|
pub timestamp: String,
|
||||||
|
|
@ -59,7 +62,7 @@ impl ResponseMeta {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Standard REST success response
|
/// Standard REST success response
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct SuccessResponse<T> {
|
pub struct SuccessResponse<T> {
|
||||||
pub data: T,
|
pub data: T,
|
||||||
pub meta: ResponseMeta,
|
pub meta: ResponseMeta,
|
||||||
|
|
@ -75,25 +78,25 @@ impl<T> SuccessResponse<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// KV Put request body
|
/// KV Put request body
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct PutRequest {
|
pub struct PutRequest {
|
||||||
pub value: String,
|
pub value: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// KV Get response
|
/// KV Get response
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct GetResponse {
|
pub struct GetResponse {
|
||||||
pub key: String,
|
pub key: String,
|
||||||
pub value: String,
|
pub value: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// KV List response
|
/// KV List response
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct ListResponse {
|
pub struct ListResponse {
|
||||||
pub items: Vec<KvItem>,
|
pub items: Vec<KvItem>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct KvItem {
|
pub struct KvItem {
|
||||||
pub key: String,
|
pub key: String,
|
||||||
pub value: String,
|
pub value: String,
|
||||||
|
|
@ -129,6 +132,13 @@ pub struct AddMemberRequestLegacy {
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub struct PrefixQuery {
|
pub struct PrefixQuery {
|
||||||
pub prefix: Option<String>,
|
pub prefix: Option<String>,
|
||||||
|
pub consistency: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Query parameters for key reads
|
||||||
|
#[derive(Debug, Default, Deserialize)]
|
||||||
|
pub struct ReadQuery {
|
||||||
|
pub consistency: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build the REST API router
|
/// Build the REST API router
|
||||||
|
|
@ -153,80 +163,11 @@ async fn health_check() -> (StatusCode, Json<SuccessResponse<serde_json::Value>>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GET /api/v1/kv/{key} - Get value
|
|
||||||
async fn get_kv(
|
|
||||||
State(state): State<RestApiState>,
|
|
||||||
Path(key): Path<String>,
|
|
||||||
) -> Result<Json<SuccessResponse<GetResponse>>, (StatusCode, Json<ErrorResponse>)> {
|
|
||||||
let sm = state.raft.state_machine();
|
|
||||||
let key_bytes = key.as_bytes().to_vec();
|
|
||||||
|
|
||||||
let results = sm.kv()
|
|
||||||
.get(&key_bytes)
|
|
||||||
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
|
||||||
|
|
||||||
let value = results
|
|
||||||
.into_iter()
|
|
||||||
.next()
|
|
||||||
.ok_or_else(|| error_response(StatusCode::NOT_FOUND, "NOT_FOUND", "Key not found"))?;
|
|
||||||
|
|
||||||
Ok(Json(SuccessResponse::new(GetResponse {
|
|
||||||
key,
|
|
||||||
value: String::from_utf8_lossy(&value.value).to_string(),
|
|
||||||
})))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// PUT /api/v1/kv/{key} - Put value
|
|
||||||
async fn put_kv(
|
|
||||||
State(state): State<RestApiState>,
|
|
||||||
Path(key): Path<String>,
|
|
||||||
Json(req): Json<PutRequest>,
|
|
||||||
) -> Result<(StatusCode, Json<SuccessResponse<serde_json::Value>>), (StatusCode, Json<ErrorResponse>)> {
|
|
||||||
let command = RaftCommand::Put {
|
|
||||||
key: key.as_bytes().to_vec(),
|
|
||||||
value: req.value.as_bytes().to_vec(),
|
|
||||||
lease_id: None,
|
|
||||||
prev_kv: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
state
|
|
||||||
.raft
|
|
||||||
.client_write(command)
|
|
||||||
.await
|
|
||||||
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
|
||||||
|
|
||||||
Ok((
|
|
||||||
StatusCode::OK,
|
|
||||||
Json(SuccessResponse::new(serde_json::json!({ "key": key, "success": true }))),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// DELETE /api/v1/kv/{key} - Delete key
|
|
||||||
async fn delete_kv(
|
|
||||||
State(state): State<RestApiState>,
|
|
||||||
Path(key): Path<String>,
|
|
||||||
) -> Result<(StatusCode, Json<SuccessResponse<serde_json::Value>>), (StatusCode, Json<ErrorResponse>)> {
|
|
||||||
let command = RaftCommand::Delete {
|
|
||||||
key: key.as_bytes().to_vec(),
|
|
||||||
prev_kv: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
state
|
|
||||||
.raft
|
|
||||||
.client_write(command)
|
|
||||||
.await
|
|
||||||
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
|
||||||
|
|
||||||
Ok((
|
|
||||||
StatusCode::OK,
|
|
||||||
Json(SuccessResponse::new(serde_json::json!({ "key": key, "success": true }))),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GET /api/v1/kv/*key - Get value (wildcard for all keys)
|
/// GET /api/v1/kv/*key - Get value (wildcard for all keys)
|
||||||
async fn get_kv_wildcard(
|
async fn get_kv_wildcard(
|
||||||
State(state): State<RestApiState>,
|
State(state): State<RestApiState>,
|
||||||
Path(key): Path<String>,
|
Path(key): Path<String>,
|
||||||
|
Query(query): Query<ReadQuery>,
|
||||||
) -> Result<Json<SuccessResponse<GetResponse>>, (StatusCode, Json<ErrorResponse>)> {
|
) -> Result<Json<SuccessResponse<GetResponse>>, (StatusCode, Json<ErrorResponse>)> {
|
||||||
// Use key as-is for simple keys, prepend / for namespaced keys
|
// Use key as-is for simple keys, prepend / for namespaced keys
|
||||||
// Keys like "testkey" stay as "testkey", keys like "flaredb/stores/1" become "/flaredb/stores/1"
|
// Keys like "testkey" stay as "testkey", keys like "flaredb/stores/1" become "/flaredb/stores/1"
|
||||||
|
|
@ -235,6 +176,14 @@ async fn get_kv_wildcard(
|
||||||
} else {
|
} else {
|
||||||
key.clone()
|
key.clone()
|
||||||
};
|
};
|
||||||
|
if should_proxy_read(query.consistency.as_deref(), &state).await {
|
||||||
|
return proxy_read_to_leader(
|
||||||
|
&state,
|
||||||
|
&format!("/api/v1/kv/{}", full_key.trim_start_matches('/')),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
let sm = state.raft.state_machine();
|
let sm = state.raft.state_machine();
|
||||||
let key_bytes = full_key.as_bytes().to_vec();
|
let key_bytes = full_key.as_bytes().to_vec();
|
||||||
|
|
||||||
|
|
@ -272,11 +221,7 @@ async fn put_kv_wildcard(
|
||||||
prev_kv: false,
|
prev_kv: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
state
|
submit_rest_write(&state, command, Some(&req), &full_key, reqwest::Method::PUT).await?;
|
||||||
.raft
|
|
||||||
.client_write(command)
|
|
||||||
.await
|
|
||||||
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
StatusCode::OK,
|
StatusCode::OK,
|
||||||
|
|
@ -300,11 +245,7 @@ async fn delete_kv_wildcard(
|
||||||
prev_kv: false,
|
prev_kv: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
state
|
submit_rest_write(&state, command, None, &full_key, reqwest::Method::DELETE).await?;
|
||||||
.raft
|
|
||||||
.client_write(command)
|
|
||||||
.await
|
|
||||||
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
StatusCode::OK,
|
StatusCode::OK,
|
||||||
|
|
@ -317,6 +258,13 @@ async fn list_kv(
|
||||||
State(state): State<RestApiState>,
|
State(state): State<RestApiState>,
|
||||||
Query(params): Query<PrefixQuery>,
|
Query(params): Query<PrefixQuery>,
|
||||||
) -> Result<Json<SuccessResponse<ListResponse>>, (StatusCode, Json<ErrorResponse>)> {
|
) -> Result<Json<SuccessResponse<ListResponse>>, (StatusCode, Json<ErrorResponse>)> {
|
||||||
|
if should_proxy_read(params.consistency.as_deref(), &state).await {
|
||||||
|
let query = params
|
||||||
|
.prefix
|
||||||
|
.as_ref()
|
||||||
|
.map(|prefix| vec![("prefix", prefix.as_str())]);
|
||||||
|
return proxy_read_to_leader(&state, "/api/v1/kv", query.as_deref()).await;
|
||||||
|
}
|
||||||
let prefix = params.prefix.unwrap_or_default();
|
let prefix = params.prefix.unwrap_or_default();
|
||||||
let sm = state.raft.state_machine();
|
let sm = state.raft.state_machine();
|
||||||
|
|
||||||
|
|
@ -445,4 +393,170 @@ fn error_response(
|
||||||
meta: ResponseMeta::new(),
|
meta: ResponseMeta::new(),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn submit_rest_write(
|
||||||
|
state: &RestApiState,
|
||||||
|
command: RaftCommand,
|
||||||
|
body: Option<&PutRequest>,
|
||||||
|
key: &str,
|
||||||
|
method: reqwest::Method,
|
||||||
|
) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
|
||||||
|
match state.raft.client_write(command).await {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(RaftError::NotLeader { leader_id }) => {
|
||||||
|
let resolved_leader = match leader_id {
|
||||||
|
Some(leader_id) => Some(leader_id),
|
||||||
|
None => state.raft.leader().await,
|
||||||
|
};
|
||||||
|
proxy_write_to_leader(state, resolved_leader, key, method, body).await
|
||||||
|
}
|
||||||
|
Err(err) => Err(error_response(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
"INTERNAL_ERROR",
|
||||||
|
&err.to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn proxy_write_to_leader(
|
||||||
|
state: &RestApiState,
|
||||||
|
leader_id: Option<u64>,
|
||||||
|
key: &str,
|
||||||
|
method: reqwest::Method,
|
||||||
|
body: Option<&PutRequest>,
|
||||||
|
) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
|
||||||
|
let leader_id = leader_id.ok_or_else(|| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"NOT_LEADER",
|
||||||
|
"current node is not the leader and no leader is known yet",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let leader_http_addr = state.peer_http_addrs.get(&leader_id).ok_or_else(|| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"NOT_LEADER",
|
||||||
|
&format!("leader {leader_id} is known but has no HTTP endpoint mapping"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let url = format!(
|
||||||
|
"{}/api/v1/kv/{}",
|
||||||
|
leader_http_addr.trim_end_matches('/'),
|
||||||
|
key.trim_start_matches('/')
|
||||||
|
);
|
||||||
|
let mut request = state.http_client.request(method, &url);
|
||||||
|
if let Some(body) = body {
|
||||||
|
request = request.json(body);
|
||||||
|
}
|
||||||
|
let response = request.send().await.map_err(|err| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::BAD_GATEWAY,
|
||||||
|
"LEADER_PROXY_FAILED",
|
||||||
|
&format!("failed to forward write to leader {leader_id}: {err}"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
if response.status().is_success() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let status = StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY);
|
||||||
|
let payload = response.json::<ErrorResponse>().await.unwrap_or_else(|err| ErrorResponse {
|
||||||
|
error: ErrorDetail {
|
||||||
|
code: "LEADER_PROXY_FAILED".to_string(),
|
||||||
|
message: format!("leader {leader_id} returned {status}: {err}"),
|
||||||
|
details: None,
|
||||||
|
},
|
||||||
|
meta: ResponseMeta::new(),
|
||||||
|
});
|
||||||
|
Err((status, Json(payload)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn should_proxy_read(consistency: Option<&str>, state: &RestApiState) -> bool {
|
||||||
|
let node_id = state.raft.node_id();
|
||||||
|
let leader_id = state.raft.leader().await;
|
||||||
|
read_requires_leader_proxy(consistency, node_id, leader_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_requires_leader_proxy(
|
||||||
|
consistency: Option<&str>,
|
||||||
|
node_id: u64,
|
||||||
|
leader_id: Option<u64>,
|
||||||
|
) -> bool {
|
||||||
|
if matches!(consistency, Some(mode) if mode.eq_ignore_ascii_case("local")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
matches!(leader_id, Some(leader_id) if leader_id != node_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn proxy_read_to_leader<T>(
|
||||||
|
state: &RestApiState,
|
||||||
|
path: &str,
|
||||||
|
query: Option<&[(&str, &str)]>,
|
||||||
|
) -> Result<Json<SuccessResponse<T>>, (StatusCode, Json<ErrorResponse>)>
|
||||||
|
where
|
||||||
|
T: for<'de> Deserialize<'de>,
|
||||||
|
{
|
||||||
|
let leader_id = state.raft.leader().await.ok_or_else(|| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"NOT_LEADER",
|
||||||
|
"current node is not the leader and no leader is known yet",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let leader_http_addr = state.peer_http_addrs.get(&leader_id).ok_or_else(|| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"NOT_LEADER",
|
||||||
|
&format!("leader {leader_id} is known but has no HTTP endpoint mapping"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let url = format!(
|
||||||
|
"{}{}",
|
||||||
|
leader_http_addr.trim_end_matches('/'),
|
||||||
|
path
|
||||||
|
);
|
||||||
|
let mut request = state.http_client.get(&url);
|
||||||
|
if let Some(query) = query {
|
||||||
|
request = request.query(query);
|
||||||
|
}
|
||||||
|
let response = request.send().await.map_err(|err| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::BAD_GATEWAY,
|
||||||
|
"LEADER_PROXY_FAILED",
|
||||||
|
&format!("failed to forward read to leader {leader_id}: {err}"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
if response.status().is_success() {
|
||||||
|
let payload = response.json::<SuccessResponse<T>>().await.map_err(|err| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::BAD_GATEWAY,
|
||||||
|
"LEADER_PROXY_FAILED",
|
||||||
|
&format!("failed to decode leader {leader_id} response: {err}"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
return Ok(Json(payload));
|
||||||
|
}
|
||||||
|
let status = StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY);
|
||||||
|
let payload = response.json::<ErrorResponse>().await.unwrap_or_else(|err| ErrorResponse {
|
||||||
|
error: ErrorDetail {
|
||||||
|
code: "LEADER_PROXY_FAILED".to_string(),
|
||||||
|
message: format!("leader {leader_id} returned {status}: {err}"),
|
||||||
|
details: None,
|
||||||
|
},
|
||||||
|
meta: ResponseMeta::new(),
|
||||||
|
});
|
||||||
|
Err((status, Json(payload)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_requires_leader_proxy_defaults_to_leader_consistency() {
|
||||||
|
assert!(read_requires_leader_proxy(None, 2, Some(1)));
|
||||||
|
assert!(!read_requires_leader_proxy(Some("local"), 2, Some(1)));
|
||||||
|
assert!(!read_requires_leader_proxy(None, 2, Some(2)));
|
||||||
|
assert!(!read_requires_leader_proxy(None, 2, None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,10 +11,11 @@ use crate::rest::{build_router, RestApiState};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use chainfire_api::internal_proto::raft_service_server::RaftServiceServer;
|
use chainfire_api::internal_proto::raft_service_server::RaftServiceServer;
|
||||||
use chainfire_api::proto::{
|
use chainfire_api::proto::{
|
||||||
cluster_server::ClusterServer, kv_server::KvServer, watch_server::WatchServer,
|
cluster_server::ClusterServer, kv_server::KvServer, watch_server::WatchServer, Member,
|
||||||
};
|
};
|
||||||
use chainfire_api::{ClusterServiceImpl, KvServiceImpl, RaftServiceImpl, WatchServiceImpl};
|
use chainfire_api::{ClusterServiceImpl, KvServiceImpl, RaftServiceImpl, WatchServiceImpl};
|
||||||
use chainfire_types::RaftRole;
|
use chainfire_types::RaftRole;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::signal;
|
use tokio::signal;
|
||||||
use tonic::transport::{Certificate, Identity, Server as TonicServer, ServerTlsConfig};
|
use tonic::transport::{Certificate, Identity, Server as TonicServer, ServerTlsConfig};
|
||||||
|
|
@ -109,6 +110,7 @@ impl Server {
|
||||||
Arc::clone(&raft),
|
Arc::clone(&raft),
|
||||||
rpc_client,
|
rpc_client,
|
||||||
self.node.cluster_id(),
|
self.node.cluster_id(),
|
||||||
|
configured_members(&self.config),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Internal Raft service for inter-node communication
|
// Internal Raft service for inter-node communication
|
||||||
|
|
@ -166,10 +168,24 @@ impl Server {
|
||||||
|
|
||||||
// HTTP REST API server
|
// HTTP REST API server
|
||||||
let http_addr = self.config.network.http_addr;
|
let http_addr = self.config.network.http_addr;
|
||||||
|
let http_port = self.config.network.http_addr.port();
|
||||||
|
let peer_http_addrs = Arc::new(
|
||||||
|
self.config
|
||||||
|
.cluster
|
||||||
|
.initial_members
|
||||||
|
.iter()
|
||||||
|
.filter_map(|member| {
|
||||||
|
http_endpoint_from_raft_addr(&member.raft_addr, http_port)
|
||||||
|
.map(|http_addr| (member.id, http_addr))
|
||||||
|
})
|
||||||
|
.collect::<HashMap<_, _>>(),
|
||||||
|
);
|
||||||
let rest_state = RestApiState {
|
let rest_state = RestApiState {
|
||||||
raft: Arc::clone(&raft),
|
raft: Arc::clone(&raft),
|
||||||
cluster_id: self.node.cluster_id(),
|
cluster_id: self.node.cluster_id(),
|
||||||
rpc_client: self.node.rpc_client().cloned(),
|
rpc_client: self.node.rpc_client().cloned(),
|
||||||
|
http_client: reqwest::Client::new(),
|
||||||
|
peer_http_addrs,
|
||||||
};
|
};
|
||||||
let rest_app = build_router(rest_state);
|
let rest_app = build_router(rest_state);
|
||||||
let http_listener = tokio::net::TcpListener::bind(&http_addr).await?;
|
let http_listener = tokio::net::TcpListener::bind(&http_addr).await?;
|
||||||
|
|
@ -286,3 +302,45 @@ impl Server {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn http_endpoint_from_raft_addr(raft_addr: &str, http_port: u16) -> Option<String> {
|
||||||
|
if let Ok(addr) = raft_addr.parse::<std::net::SocketAddr>() {
|
||||||
|
return Some(format!("http://{}:{}", addr.ip(), http_port));
|
||||||
|
}
|
||||||
|
let (host, _) = raft_addr.rsplit_once(':')?;
|
||||||
|
Some(format!("http://{}:{}", host, http_port))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn grpc_endpoint_from_raft_addr(raft_addr: &str, api_port: u16) -> Option<String> {
|
||||||
|
if let Ok(addr) = raft_addr.parse::<std::net::SocketAddr>() {
|
||||||
|
return Some(format!("http://{}:{}", addr.ip(), api_port));
|
||||||
|
}
|
||||||
|
let (host, _) = raft_addr.rsplit_once(':')?;
|
||||||
|
Some(format!("http://{}:{}", host, api_port))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_peer_url(raft_addr: &str) -> String {
|
||||||
|
if raft_addr.contains("://") {
|
||||||
|
raft_addr.to_string()
|
||||||
|
} else {
|
||||||
|
format!("http://{raft_addr}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn configured_members(config: &ServerConfig) -> Vec<Member> {
|
||||||
|
let api_port = config.network.api_addr.port();
|
||||||
|
config
|
||||||
|
.cluster
|
||||||
|
.initial_members
|
||||||
|
.iter()
|
||||||
|
.map(|member| Member {
|
||||||
|
id: member.id,
|
||||||
|
name: format!("node-{}", member.id),
|
||||||
|
peer_urls: vec![normalize_peer_url(&member.raft_addr)],
|
||||||
|
client_urls: grpc_endpoint_from_raft_addr(&member.raft_addr, api_port)
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
is_learner: false,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
|
||||||
1114
coronafs/Cargo.lock
generated
1114
coronafs/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -24,6 +24,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
||||||
|
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||||
|
|
||||||
[workspace.lints.rust]
|
[workspace.lints.rust]
|
||||||
unsafe_code = "deny"
|
unsafe_code = "deny"
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,11 @@ tracing-subscriber = { workspace = true }
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
chrono = { workspace = true }
|
chrono = { workspace = true }
|
||||||
|
reqwest = { workspace = true }
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tempfile = "3"
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,40 @@ use serde::{Deserialize, Serialize};
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum ServerMode {
|
||||||
|
Combined,
|
||||||
|
Controller,
|
||||||
|
Node,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ServerMode {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Combined
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum MetadataBackend {
|
||||||
|
Filesystem,
|
||||||
|
Chainfire,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MetadataBackend {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Filesystem
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub struct ServerConfig {
|
pub struct ServerConfig {
|
||||||
|
pub mode: ServerMode,
|
||||||
|
pub metadata_backend: MetadataBackend,
|
||||||
|
pub chainfire_api_url: Option<String>,
|
||||||
|
pub chainfire_key_prefix: String,
|
||||||
pub listen_addr: SocketAddr,
|
pub listen_addr: SocketAddr,
|
||||||
pub advertise_host: String,
|
pub advertise_host: String,
|
||||||
pub data_dir: PathBuf,
|
pub data_dir: PathBuf,
|
||||||
|
|
@ -26,6 +57,10 @@ pub struct ServerConfig {
|
||||||
impl Default for ServerConfig {
|
impl Default for ServerConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
mode: ServerMode::Combined,
|
||||||
|
metadata_backend: MetadataBackend::Filesystem,
|
||||||
|
chainfire_api_url: None,
|
||||||
|
chainfire_key_prefix: "/coronafs/volumes".to_string(),
|
||||||
listen_addr: "0.0.0.0:50088".parse().expect("valid listen addr"),
|
listen_addr: "0.0.0.0:50088".parse().expect("valid listen addr"),
|
||||||
advertise_host: "127.0.0.1".to_string(),
|
advertise_host: "127.0.0.1".to_string(),
|
||||||
data_dir: PathBuf::from("/var/lib/coronafs"),
|
data_dir: PathBuf::from("/var/lib/coronafs"),
|
||||||
|
|
@ -34,7 +69,7 @@ impl Default for ServerConfig {
|
||||||
export_port_count: 512,
|
export_port_count: 512,
|
||||||
export_shared_clients: 32,
|
export_shared_clients: 32,
|
||||||
export_cache_mode: "none".to_string(),
|
export_cache_mode: "none".to_string(),
|
||||||
export_aio_mode: "io_uring".to_string(),
|
export_aio_mode: "threads".to_string(),
|
||||||
export_discard_mode: "unmap".to_string(),
|
export_discard_mode: "unmap".to_string(),
|
||||||
export_detect_zeroes_mode: "unmap".to_string(),
|
export_detect_zeroes_mode: "unmap".to_string(),
|
||||||
preallocate: true,
|
preallocate: true,
|
||||||
|
|
@ -47,6 +82,14 @@ impl Default for ServerConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ServerConfig {
|
impl ServerConfig {
|
||||||
|
pub fn supports_controller_api(&self) -> bool {
|
||||||
|
matches!(self.mode, ServerMode::Combined | ServerMode::Controller)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn supports_node_api(&self) -> bool {
|
||||||
|
matches!(self.mode, ServerMode::Combined | ServerMode::Node)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn volume_dir(&self) -> PathBuf {
|
pub fn volume_dir(&self) -> PathBuf {
|
||||||
self.data_dir.join("volumes")
|
self.data_dir.join("volumes")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
231
coronafs/scripts/benchmark-local-export.sh
Executable file
231
coronafs/scripts/benchmark-local-export.sh
Executable file
|
|
@ -0,0 +1,231 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||||
|
|
||||||
|
require_cmd() {
|
||||||
|
command -v "$1" >/dev/null 2>&1 || {
|
||||||
|
echo "missing required command: $1" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for cmd in curl qemu-io; do
|
||||||
|
require_cmd "${cmd}"
|
||||||
|
done
|
||||||
|
|
||||||
|
if ! command -v jq >/dev/null 2>&1 && ! command -v python3 >/dev/null 2>&1; then
|
||||||
|
echo "missing required command: jq or python3" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
json_get() {
|
||||||
|
local query="$1"
|
||||||
|
if command -v jq >/dev/null 2>&1; then
|
||||||
|
jq -r "${query}"
|
||||||
|
else
|
||||||
|
python3 -c 'import json,sys
|
||||||
|
data=json.load(sys.stdin)
|
||||||
|
value=data
|
||||||
|
for part in sys.argv[1].split("."):
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
value=value.get(part) if isinstance(value, dict) else None
|
||||||
|
if value is None:
|
||||||
|
break
|
||||||
|
print("" if value is None else value)
|
||||||
|
' "${query}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
RUN_ID="${CORONAFS_BENCH_RUN_ID:-$$}"
|
||||||
|
LISTEN_PORT="${CORONAFS_BENCH_PORT:-$((25088 + (RUN_ID % 1000)))}"
|
||||||
|
EXPORT_BASE_PORT="${CORONAFS_BENCH_EXPORT_BASE_PORT:-$((26100 + (RUN_ID % 1000)))}"
|
||||||
|
VOLUME_ID="${CORONAFS_BENCH_VOLUME_ID:-local-bench-${RUN_ID}}"
|
||||||
|
SIZE_MIB="${CORONAFS_BENCH_SIZE_MIB:-${CORONAFS_BENCH_SIZE_MB:-512}}"
|
||||||
|
SIZE_BYTES="${CORONAFS_BENCH_SIZE_BYTES:-$((SIZE_MIB * 1024 * 1024))}"
|
||||||
|
WORKLOAD_MIB="${CORONAFS_BENCH_WORKLOAD_MIB:-${CORONAFS_BENCH_WORKLOAD_MB:-256}}"
|
||||||
|
EXPORT_CACHE_MODE="${CORONAFS_BENCH_EXPORT_CACHE_MODE:-none}"
|
||||||
|
EXPORT_AIO_MODE="${CORONAFS_BENCH_EXPORT_AIO_MODE:-threads}"
|
||||||
|
EXPORT_DISCARD_MODE="${CORONAFS_BENCH_EXPORT_DISCARD_MODE:-ignore}"
|
||||||
|
EXPORT_DETECT_ZEROES_MODE="${CORONAFS_BENCH_EXPORT_DETECT_ZEROES_MODE:-off}"
|
||||||
|
SERVER_BIN="${CORONAFS_SERVER_BIN:-}"
|
||||||
|
|
||||||
|
if (( WORKLOAD_MIB > SIZE_MIB )); then
|
||||||
|
echo "workload ${WORKLOAD_MIB} MiB exceeds volume size ${SIZE_MIB} MiB" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "${SERVER_BIN}" ]]; then
|
||||||
|
SERVER_CMD=(
|
||||||
|
cargo run
|
||||||
|
--manifest-path "${REPO_ROOT}/coronafs/Cargo.toml"
|
||||||
|
-p coronafs-server
|
||||||
|
--
|
||||||
|
)
|
||||||
|
else
|
||||||
|
SERVER_CMD=("${SERVER_BIN}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
TMP_DIR="$(mktemp -d)"
|
||||||
|
CONFIG_PATH="${TMP_DIR}/coronafs.toml"
|
||||||
|
SERVER_LOG="${TMP_DIR}/coronafs.log"
|
||||||
|
SERVER_PID=""
|
||||||
|
|
||||||
|
show_server_log() {
|
||||||
|
if [[ -f "${SERVER_LOG}" ]]; then
|
||||||
|
echo "--- coronafs server log ---" >&2
|
||||||
|
tail -n 200 "${SERVER_LOG}" >&2 || true
|
||||||
|
echo "--- end coronafs server log ---" >&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
delete_volume_if_present() {
|
||||||
|
curl -fsS -X DELETE "http://127.0.0.1:${LISTEN_PORT}/v1/volumes/${VOLUME_ID}" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
delete_volume_if_present
|
||||||
|
local pid_file="${TMP_DIR}/data/pids/${VOLUME_ID}.pid"
|
||||||
|
if [[ -f "${pid_file}" ]]; then
|
||||||
|
local export_pid=""
|
||||||
|
export_pid="$(tr -d '\n' <"${pid_file}" 2>/dev/null || true)"
|
||||||
|
if [[ -n "${export_pid}" ]] && kill -0 "${export_pid}" 2>/dev/null; then
|
||||||
|
kill "${export_pid}" >/dev/null 2>&1 || true
|
||||||
|
wait "${export_pid}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
rm -f "${pid_file}"
|
||||||
|
fi
|
||||||
|
if [[ -n "${SERVER_PID}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
|
||||||
|
kill "${SERVER_PID}" >/dev/null 2>&1 || true
|
||||||
|
wait "${SERVER_PID}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
rm -rf "${TMP_DIR}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
cat >"${CONFIG_PATH}" <<EOF
|
||||||
|
listen_addr = "127.0.0.1:${LISTEN_PORT}"
|
||||||
|
advertise_host = "127.0.0.1"
|
||||||
|
data_dir = "${TMP_DIR}/data"
|
||||||
|
export_bind_addr = "127.0.0.1"
|
||||||
|
export_base_port = ${EXPORT_BASE_PORT}
|
||||||
|
export_port_count = 8
|
||||||
|
export_shared_clients = 32
|
||||||
|
export_cache_mode = "${EXPORT_CACHE_MODE}"
|
||||||
|
export_aio_mode = "${EXPORT_AIO_MODE}"
|
||||||
|
export_discard_mode = "${EXPORT_DISCARD_MODE}"
|
||||||
|
export_detect_zeroes_mode = "${EXPORT_DETECT_ZEROES_MODE}"
|
||||||
|
preallocate = false
|
||||||
|
sync_on_write = false
|
||||||
|
log_level = "info"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
"${SERVER_CMD[@]}" --config "${CONFIG_PATH}" >"${SERVER_LOG}" 2>&1 &
|
||||||
|
SERVER_PID="$!"
|
||||||
|
|
||||||
|
deadline=$((SECONDS + 60))
|
||||||
|
until curl -fsS "http://127.0.0.1:${LISTEN_PORT}/healthz" >/dev/null 2>&1; do
|
||||||
|
if (( SECONDS >= deadline )); then
|
||||||
|
echo "timed out waiting for coronafs local bench server" >&2
|
||||||
|
tail -n 200 "${SERVER_LOG}" >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
create_response_file="${TMP_DIR}/create-response.txt"
|
||||||
|
create_status="$(
|
||||||
|
curl -sS \
|
||||||
|
-o "${create_response_file}" \
|
||||||
|
-w '%{http_code}' \
|
||||||
|
-X PUT \
|
||||||
|
-H 'content-type: application/json' \
|
||||||
|
-d "{\"size_bytes\":${SIZE_BYTES}}" \
|
||||||
|
"http://127.0.0.1:${LISTEN_PORT}/v1/volumes/${VOLUME_ID}"
|
||||||
|
)"
|
||||||
|
if [[ "${create_status}" -lt 200 || "${create_status}" -ge 300 ]]; then
|
||||||
|
echo "failed to create CoronaFS benchmark volume: HTTP ${create_status}" >&2
|
||||||
|
cat "${create_response_file}" >&2 || true
|
||||||
|
show_server_log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
export_response_file="${TMP_DIR}/export-response.txt"
|
||||||
|
export_status="$(
|
||||||
|
curl -sS \
|
||||||
|
-o "${export_response_file}" \
|
||||||
|
-w '%{http_code}' \
|
||||||
|
-X POST \
|
||||||
|
"http://127.0.0.1:${LISTEN_PORT}/v1/volumes/${VOLUME_ID}/export"
|
||||||
|
)"
|
||||||
|
if [[ "${export_status}" -lt 200 || "${export_status}" -ge 300 ]]; then
|
||||||
|
echo "failed to export CoronaFS benchmark volume: HTTP ${export_status}" >&2
|
||||||
|
cat "${export_response_file}" >&2 || true
|
||||||
|
show_server_log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
EXPORT_JSON="$(cat "${export_response_file}")"
|
||||||
|
EXPORT_URI="$(printf '%s' "${EXPORT_JSON}" | json_get '.export.uri')"
|
||||||
|
[[ -n "${EXPORT_URI}" && "${EXPORT_URI}" != "null" ]] || {
|
||||||
|
echo "failed to obtain CoronaFS export URI" >&2
|
||||||
|
printf '%s\n' "${EXPORT_JSON}" >&2
|
||||||
|
show_server_log
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
run_qemu_io() {
|
||||||
|
local extra=()
|
||||||
|
local start_ns end_ns elapsed_ns
|
||||||
|
local args=("$@")
|
||||||
|
local cmd=()
|
||||||
|
local qemu_cmd=""
|
||||||
|
|
||||||
|
if [[ "${#args[@]}" -eq 0 ]]; then
|
||||||
|
echo "run_qemu_io requires at least one qemu-io command" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
while [[ "${#args[@]}" -gt 0 && "${args[0]}" == --* ]]; do
|
||||||
|
extra+=("${args[0]}")
|
||||||
|
args=("${args[@]:1}")
|
||||||
|
done
|
||||||
|
|
||||||
|
cmd=(qemu-io -f raw "${extra[@]}")
|
||||||
|
for qemu_cmd in "${args[@]}"; do
|
||||||
|
cmd+=(-c "${qemu_cmd}")
|
||||||
|
done
|
||||||
|
cmd+=("${EXPORT_URI}")
|
||||||
|
|
||||||
|
start_ns="$(date +%s%N)"
|
||||||
|
"${cmd[@]}" >/dev/null
|
||||||
|
end_ns="$(date +%s%N)"
|
||||||
|
elapsed_ns="$((end_ns - start_ns))"
|
||||||
|
printf '%s\n' "${elapsed_ns}"
|
||||||
|
}
|
||||||
|
|
||||||
|
calc_mib_per_s() {
|
||||||
|
local bytes="$1"
|
||||||
|
local elapsed_ns="$2"
|
||||||
|
awk -v bytes="${bytes}" -v elapsed_ns="${elapsed_ns}" '
|
||||||
|
BEGIN {
|
||||||
|
if (elapsed_ns <= 0) {
|
||||||
|
print "0.00"
|
||||||
|
} else {
|
||||||
|
printf "%.2f", (bytes / 1048576.0) / (elapsed_ns / 1000000000.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
|
BYTES="$((WORKLOAD_MIB * 1024 * 1024))"
|
||||||
|
WRITE_NS="$(run_qemu_io "write -P 0x5a 0 ${WORKLOAD_MIB}M" "flush")"
|
||||||
|
READ_NS="$(run_qemu_io "read -P 0x5a 0 ${WORKLOAD_MIB}M")"
|
||||||
|
WRITE_MIBPS="$(calc_mib_per_s "${BYTES}" "${WRITE_NS}")"
|
||||||
|
READ_MIBPS="$(calc_mib_per_s "${BYTES}" "${READ_NS}")"
|
||||||
|
|
||||||
|
printf 'CoronaFS local export bench: uri=%s cache=%s aio=%s write=%s MiB/s read=%s MiB/s size=%s MiB\n' \
|
||||||
|
"${EXPORT_URI}" "${EXPORT_CACHE_MODE}" "${EXPORT_AIO_MODE}" "${WRITE_MIBPS}" "${READ_MIBPS}" "${WORKLOAD_MIB}"
|
||||||
|
|
||||||
|
printf '%s\t%s\t%s\t%s\t%s\n' "${EXPORT_URI}" "${EXPORT_CACHE_MODE}" "${EXPORT_AIO_MODE}" "${WRITE_MIBPS}" "${READ_MIBPS}"
|
||||||
561
creditservice/Cargo.lock
generated
561
creditservice/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
525
deployer/Cargo.lock
generated
525
deployer/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -12,8 +12,11 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
serde_yaml = "0.9"
|
serde_yaml = "0.9"
|
||||||
|
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
||||||
|
|
||||||
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
||||||
deployer-types = { path = "../deployer-types" }
|
deployer-types = { path = "../deployer-types" }
|
||||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
axum = { version = "0.7", features = ["macros"] }
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,12 @@ use std::path::Path;
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use chainfire_client::{Client, ClientError};
|
use chainfire_client::{Client, ClientError};
|
||||||
use deployer_types::{ClusterStateSpec, DesiredSystemSpec, InstallPlan, NodeConfig, NodeSpec};
|
use chrono::Utc;
|
||||||
|
use deployer_types::{
|
||||||
|
ClusterNodeRecord, ClusterStateSpec, CommissionState, DesiredSystemSpec, HostDeploymentSpec,
|
||||||
|
HostDeploymentStatus, InstallPlan, InstallState, NodeConfig, NodeSpec, ObservedSystemState,
|
||||||
|
PowerState,
|
||||||
|
};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
|
|
@ -49,6 +54,56 @@ fn key_desired_system(cluster_namespace: &str, cluster_id: &str, node_id: &str)
|
||||||
.into_bytes()
|
.into_bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn key_observed_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}/observed-system",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_host_deployment_spec(
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}deployments/hosts/{}/spec",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
deployment_name
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_host_deployment_status(
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}deployments/hosts/{}/status",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
deployment_name
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_commission_state(value: &str) -> Result<CommissionState> {
|
||||||
|
serde_json::from_str(&format!("\"{value}\""))
|
||||||
|
.with_context(|| format!("invalid commission state {value}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_install_state(value: &str) -> Result<InstallState> {
|
||||||
|
serde_json::from_str(&format!("\"{value}\""))
|
||||||
|
.with_context(|| format!("invalid install state {value}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_power_state(value: &str) -> Result<PowerState> {
|
||||||
|
serde_json::from_str(&format!("\"{value}\""))
|
||||||
|
.with_context(|| format!("invalid power state {value}"))
|
||||||
|
}
|
||||||
|
|
||||||
fn key_node_class(cluster_namespace: &str, cluster_id: &str, node_class: &str) -> Vec<u8> {
|
fn key_node_class(cluster_namespace: &str, cluster_id: &str, node_class: &str) -> Vec<u8> {
|
||||||
format!(
|
format!(
|
||||||
"{}node-classes/{}",
|
"{}node-classes/{}",
|
||||||
|
|
@ -178,6 +233,9 @@ fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> {
|
||||||
if desired.rollback_on_failure.is_none() {
|
if desired.rollback_on_failure.is_none() {
|
||||||
desired.rollback_on_failure = Some(true);
|
desired.rollback_on_failure = Some(true);
|
||||||
}
|
}
|
||||||
|
if desired.drain_before_apply.is_none() {
|
||||||
|
desired.drain_before_apply = Some(false);
|
||||||
|
}
|
||||||
if desired.nixos_configuration.is_some() {
|
if desired.nixos_configuration.is_some() {
|
||||||
Some(desired)
|
Some(desired)
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -322,6 +380,30 @@ async fn merge_existing_node_observed_fields(
|
||||||
if merged.state.is_none() {
|
if merged.state.is_none() {
|
||||||
merged.state = existing_node.state;
|
merged.state = existing_node.state;
|
||||||
}
|
}
|
||||||
|
if merged.machine_id.is_none() {
|
||||||
|
merged.machine_id = existing_node.machine_id;
|
||||||
|
}
|
||||||
|
if merged.hardware_facts.is_none() {
|
||||||
|
merged.hardware_facts = existing_node.hardware_facts;
|
||||||
|
}
|
||||||
|
if merged.commission_state.is_none() {
|
||||||
|
merged.commission_state = existing_node.commission_state;
|
||||||
|
}
|
||||||
|
if merged.install_state.is_none() {
|
||||||
|
merged.install_state = existing_node.install_state;
|
||||||
|
}
|
||||||
|
if merged.commissioned_at.is_none() {
|
||||||
|
merged.commissioned_at = existing_node.commissioned_at;
|
||||||
|
}
|
||||||
|
if merged.last_inventory_hash.is_none() {
|
||||||
|
merged.last_inventory_hash = existing_node.last_inventory_hash;
|
||||||
|
}
|
||||||
|
if merged.power_state.is_none() {
|
||||||
|
merged.power_state = existing_node.power_state;
|
||||||
|
}
|
||||||
|
if merged.bmc_ref.is_none() {
|
||||||
|
merged.bmc_ref = existing_node.bmc_ref;
|
||||||
|
}
|
||||||
if merged.last_heartbeat.is_none() {
|
if merged.last_heartbeat.is_none() {
|
||||||
merged.last_heartbeat = existing_node.last_heartbeat;
|
merged.last_heartbeat = existing_node.last_heartbeat;
|
||||||
}
|
}
|
||||||
|
|
@ -521,6 +603,13 @@ pub async fn bootstrap_cluster(
|
||||||
info!(enrollment_rule = %rule.name, "upserted enrollment rule");
|
info!(enrollment_rule = %rule.name, "upserted enrollment rule");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for deployment in &spec.host_deployments {
|
||||||
|
let key = key_host_deployment_spec(cluster_namespace, cluster_id, &deployment.name);
|
||||||
|
let value = serde_json::to_vec(deployment)?;
|
||||||
|
client.put(&key, &value).await?;
|
||||||
|
info!(deployment = %deployment.name, "upserted host deployment");
|
||||||
|
}
|
||||||
|
|
||||||
// 3. Service / Instance (必要であれば)
|
// 3. Service / Instance (必要であれば)
|
||||||
for svc in &spec.services {
|
for svc in &spec.services {
|
||||||
let key = key_service(cluster_namespace, cluster_id, &svc.name);
|
let key = key_service(cluster_namespace, cluster_id, &svc.name);
|
||||||
|
|
@ -627,6 +716,11 @@ pub async fn apply_cluster_state(
|
||||||
let value = serde_json::to_vec(rule)?;
|
let value = serde_json::to_vec(rule)?;
|
||||||
client.put(&key, &value).await?;
|
client.put(&key, &value).await?;
|
||||||
}
|
}
|
||||||
|
for deployment in &spec.host_deployments {
|
||||||
|
let key = key_host_deployment_spec(cluster_namespace, cluster_id, &deployment.name);
|
||||||
|
let value = serde_json::to_vec(deployment)?;
|
||||||
|
client.put(&key, &value).await?;
|
||||||
|
}
|
||||||
for svc in &spec.services {
|
for svc in &spec.services {
|
||||||
let key = key_service(cluster_namespace, cluster_id, &svc.name);
|
let key = key_service(cluster_namespace, cluster_id, &svc.name);
|
||||||
let value = serde_json::to_vec(svc)?;
|
let value = serde_json::to_vec(svc)?;
|
||||||
|
|
@ -706,6 +800,421 @@ pub async fn dump_prefix(endpoint: &str, prefix: &str, json_output: bool) -> Res
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_json_key<T: DeserializeOwned>(client: &mut Client, key: &[u8]) -> Result<Option<T>> {
|
||||||
|
client
|
||||||
|
.get(key)
|
||||||
|
.await?
|
||||||
|
.map(|bytes| serde_json::from_slice::<T>(&bytes))
|
||||||
|
.transpose()
|
||||||
|
.with_context(|| format!("failed to decode key {}", String::from_utf8_lossy(key)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn inspect_node(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
include_desired_system: bool,
|
||||||
|
include_observed_system: bool,
|
||||||
|
json_output: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "inspect node", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let node_id = node_id.to_string();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let node = get_json_key::<ClusterNodeRecord>(
|
||||||
|
&mut client,
|
||||||
|
&key_node(&cluster_namespace, &cluster_id, &node_id),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
.with_context(|| format!("node {} not found", node_id))?;
|
||||||
|
|
||||||
|
let desired_system = if include_desired_system {
|
||||||
|
get_json_key::<DesiredSystemSpec>(
|
||||||
|
&mut client,
|
||||||
|
&key_desired_system(&cluster_namespace, &cluster_id, &node_id),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let observed_system = if include_observed_system {
|
||||||
|
get_json_key::<ObservedSystemState>(
|
||||||
|
&mut client,
|
||||||
|
&key_observed_system(&cluster_namespace, &cluster_id, &node_id),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if json_output {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
serde_json::to_string_pretty(&json!({
|
||||||
|
"node": node,
|
||||||
|
"desired_system": desired_system,
|
||||||
|
"observed_system": observed_system,
|
||||||
|
}))?
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("node_id={}", node.node_id);
|
||||||
|
println!("hostname={}", node.hostname);
|
||||||
|
println!("ip={}", node.ip);
|
||||||
|
println!("state={}", node.state.as_deref().unwrap_or("unknown"));
|
||||||
|
println!(
|
||||||
|
"commission_state={}",
|
||||||
|
node.commission_state
|
||||||
|
.map(|value| serde_json::to_string(&value).unwrap_or_default())
|
||||||
|
.unwrap_or_else(|| "\"unknown\"".to_string())
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"install_state={}",
|
||||||
|
node.install_state
|
||||||
|
.map(|value| serde_json::to_string(&value).unwrap_or_default())
|
||||||
|
.unwrap_or_else(|| "\"unknown\"".to_string())
|
||||||
|
);
|
||||||
|
if let Some(observed_system) = observed_system {
|
||||||
|
println!(
|
||||||
|
"observed_status={}",
|
||||||
|
observed_system.status.unwrap_or_else(|| "unknown".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_node_states(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
state: Option<String>,
|
||||||
|
commission_state: Option<String>,
|
||||||
|
install_state: Option<String>,
|
||||||
|
power_state: Option<String>,
|
||||||
|
bmc_ref: Option<String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "set node state", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let node_id = node_id.to_string();
|
||||||
|
let state = state.clone();
|
||||||
|
let commission_state = commission_state.clone();
|
||||||
|
let install_state = install_state.clone();
|
||||||
|
let power_state = power_state.clone();
|
||||||
|
let bmc_ref = bmc_ref.clone();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let key = key_node(&cluster_namespace, &cluster_id, &node_id);
|
||||||
|
let mut node = get_json_key::<ClusterNodeRecord>(&mut client, &key)
|
||||||
|
.await?
|
||||||
|
.with_context(|| format!("node {} not found", node_id))?;
|
||||||
|
|
||||||
|
if let Some(state) = state {
|
||||||
|
node.state = Some(state);
|
||||||
|
}
|
||||||
|
if let Some(commission_state) = commission_state {
|
||||||
|
let parsed = parse_commission_state(&commission_state)?;
|
||||||
|
if matches!(parsed, CommissionState::Commissioned) && node.commissioned_at.is_none()
|
||||||
|
{
|
||||||
|
node.commissioned_at = Some(Utc::now());
|
||||||
|
}
|
||||||
|
node.commission_state = Some(parsed);
|
||||||
|
}
|
||||||
|
if let Some(install_state) = install_state {
|
||||||
|
node.install_state = Some(parse_install_state(&install_state)?);
|
||||||
|
}
|
||||||
|
if let Some(power_state) = power_state {
|
||||||
|
node.power_state = Some(parse_power_state(&power_state)?);
|
||||||
|
}
|
||||||
|
if let Some(bmc_ref) = bmc_ref {
|
||||||
|
node.bmc_ref = Some(bmc_ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
client.put(&key, &serde_json::to_vec(&node)?).await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&node)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
pub async fn set_observed_system(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
status: Option<String>,
|
||||||
|
nixos_configuration: Option<String>,
|
||||||
|
target_system: Option<String>,
|
||||||
|
current_system: Option<String>,
|
||||||
|
configured_system: Option<String>,
|
||||||
|
booted_system: Option<String>,
|
||||||
|
rollback_system: Option<String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "set observed system", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let node_id = node_id.to_string();
|
||||||
|
let status = status.clone();
|
||||||
|
let nixos_configuration = nixos_configuration.clone();
|
||||||
|
let target_system = target_system.clone();
|
||||||
|
let current_system = current_system.clone();
|
||||||
|
let configured_system = configured_system.clone();
|
||||||
|
let booted_system = booted_system.clone();
|
||||||
|
let rollback_system = rollback_system.clone();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let key = key_observed_system(&cluster_namespace, &cluster_id, &node_id);
|
||||||
|
let mut observed = get_json_key::<ObservedSystemState>(&mut client, &key)
|
||||||
|
.await?
|
||||||
|
.unwrap_or_else(|| ObservedSystemState {
|
||||||
|
node_id: node_id.clone(),
|
||||||
|
..ObservedSystemState::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
observed.node_id = node_id.clone();
|
||||||
|
if let Some(status) = status {
|
||||||
|
observed.status = Some(status);
|
||||||
|
}
|
||||||
|
if let Some(nixos_configuration) = nixos_configuration {
|
||||||
|
observed.nixos_configuration = Some(nixos_configuration);
|
||||||
|
}
|
||||||
|
if let Some(target_system) = target_system {
|
||||||
|
observed.target_system = Some(target_system);
|
||||||
|
}
|
||||||
|
if let Some(current_system) = current_system {
|
||||||
|
observed.current_system = Some(current_system);
|
||||||
|
}
|
||||||
|
if let Some(configured_system) = configured_system {
|
||||||
|
observed.configured_system = Some(configured_system);
|
||||||
|
}
|
||||||
|
if let Some(booted_system) = booted_system {
|
||||||
|
observed.booted_system = Some(booted_system);
|
||||||
|
}
|
||||||
|
if let Some(rollback_system) = rollback_system {
|
||||||
|
observed.rollback_system = Some(rollback_system);
|
||||||
|
}
|
||||||
|
|
||||||
|
client.put(&key, &serde_json::to_vec(&observed)?).await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&observed)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn inspect_host_deployment(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
json_output: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "inspect host deployment", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let deployment_name = deployment_name.to_string();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let spec = get_json_key::<HostDeploymentSpec>(
|
||||||
|
&mut client,
|
||||||
|
&key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
.with_context(|| format!("host deployment {} not found", deployment_name))?;
|
||||||
|
let status = get_json_key::<HostDeploymentStatus>(
|
||||||
|
&mut client,
|
||||||
|
&key_host_deployment_status(&cluster_namespace, &cluster_id, &deployment_name),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if json_output {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
serde_json::to_string_pretty(&json!({
|
||||||
|
"spec": spec,
|
||||||
|
"status": status,
|
||||||
|
}))?
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("name={}", spec.name);
|
||||||
|
println!(
|
||||||
|
"nixos_configuration={}",
|
||||||
|
spec.nixos_configuration.as_deref().unwrap_or("unknown")
|
||||||
|
);
|
||||||
|
if let Some(status) = status {
|
||||||
|
println!("phase={}", status.phase.as_deref().unwrap_or("unknown"));
|
||||||
|
println!("paused={}", status.paused);
|
||||||
|
println!("selected_nodes={}", status.selected_nodes.join(","));
|
||||||
|
println!("completed_nodes={}", status.completed_nodes.join(","));
|
||||||
|
println!("failed_nodes={}", status.failed_nodes.join(","));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_host_deployment_paused(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
paused: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "set host deployment pause state", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let deployment_name = deployment_name.to_string();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let spec_key = key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
|
||||||
|
if client.get(&spec_key).await?.is_none() {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"host deployment {} not found",
|
||||||
|
deployment_name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let status_key =
|
||||||
|
key_host_deployment_status(&cluster_namespace, &cluster_id, &deployment_name);
|
||||||
|
let mut status = get_json_key::<HostDeploymentStatus>(&mut client, &status_key)
|
||||||
|
.await?
|
||||||
|
.unwrap_or_else(|| HostDeploymentStatus {
|
||||||
|
name: deployment_name.clone(),
|
||||||
|
..HostDeploymentStatus::default()
|
||||||
|
});
|
||||||
|
status.name = deployment_name.clone();
|
||||||
|
status.paused_by_operator = paused;
|
||||||
|
status.paused = paused;
|
||||||
|
status.phase = Some(if paused { "paused" } else { "ready" }.to_string());
|
||||||
|
status.message = Some(if paused {
|
||||||
|
"paused by operator".to_string()
|
||||||
|
} else {
|
||||||
|
"resumed by operator".to_string()
|
||||||
|
});
|
||||||
|
status.updated_at = Some(Utc::now());
|
||||||
|
client.put(&status_key, &serde_json::to_vec(&status)?).await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&status)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn abort_host_deployment(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
with_chainfire_endpoint_failover(&endpoints, "abort host deployment", |endpoint| {
|
||||||
|
let endpoint = endpoint.to_string();
|
||||||
|
let cluster_namespace = cluster_namespace.to_string();
|
||||||
|
let cluster_id = cluster_id.to_string();
|
||||||
|
let deployment_name = deployment_name.to_string();
|
||||||
|
async move {
|
||||||
|
let mut client = Client::connect(endpoint).await?;
|
||||||
|
let spec_key = key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
|
||||||
|
if client.get(&spec_key).await?.is_none() {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"host deployment {} not found",
|
||||||
|
deployment_name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let node_prefix = format!("{}nodes/", cluster_prefix(&cluster_namespace, &cluster_id));
|
||||||
|
let existing = client.get_prefix(node_prefix.as_bytes()).await?;
|
||||||
|
let mut cleared_nodes = Vec::new();
|
||||||
|
|
||||||
|
for (key, value) in &existing {
|
||||||
|
let key_str = String::from_utf8_lossy(&key);
|
||||||
|
if key_str.ends_with("/desired-system") {
|
||||||
|
let Ok(desired) = serde_json::from_slice::<DesiredSystemSpec>(value) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if desired.deployment_id.as_deref() == Some(deployment_name.as_str()) {
|
||||||
|
client.delete(&key).await?;
|
||||||
|
cleared_nodes.push(desired.node_id.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (key, value) in existing {
|
||||||
|
let key_str = String::from_utf8_lossy(&key);
|
||||||
|
if key_str.ends_with("/desired-system") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let node_suffix = key_str
|
||||||
|
.strip_prefix(&node_prefix)
|
||||||
|
.filter(|suffix| !suffix.contains('/'));
|
||||||
|
let Some(node_id) = node_suffix else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let mut node = match serde_json::from_slice::<ClusterNodeRecord>(&value) {
|
||||||
|
Ok(node) => node,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if cleared_nodes.iter().any(|cleared| cleared == node_id)
|
||||||
|
&& node.state.as_deref() == Some("draining")
|
||||||
|
{
|
||||||
|
node.state = Some("active".to_string());
|
||||||
|
client.put(&key, &serde_json::to_vec(&node)?).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let status = HostDeploymentStatus {
|
||||||
|
name: deployment_name.clone(),
|
||||||
|
phase: Some("aborted".to_string()),
|
||||||
|
paused: true,
|
||||||
|
paused_by_operator: true,
|
||||||
|
selected_nodes: Vec::new(),
|
||||||
|
completed_nodes: Vec::new(),
|
||||||
|
in_progress_nodes: Vec::new(),
|
||||||
|
failed_nodes: Vec::new(),
|
||||||
|
message: Some(format!(
|
||||||
|
"aborted by operator; cleared desired-system from {} node(s)",
|
||||||
|
cleared_nodes.len()
|
||||||
|
)),
|
||||||
|
updated_at: Some(Utc::now()),
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.put(
|
||||||
|
&key_host_deployment_status(&cluster_namespace, &cluster_id, &deployment_name),
|
||||||
|
&serde_json::to_vec(&status)?,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&status)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
async fn prune_cluster_state(
|
async fn prune_cluster_state(
|
||||||
client: &mut Client,
|
client: &mut Client,
|
||||||
cluster_namespace: &str,
|
cluster_namespace: &str,
|
||||||
|
|
@ -762,6 +1271,16 @@ async fn prune_cluster_state(
|
||||||
.to_string(),
|
.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
for deployment in &spec.host_deployments {
|
||||||
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_host_deployment_spec(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&deployment.name,
|
||||||
|
))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
for svc in &spec.services {
|
for svc in &spec.services {
|
||||||
desired_keys.insert(
|
desired_keys.insert(
|
||||||
String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name))
|
String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name))
|
||||||
|
|
@ -893,11 +1412,18 @@ mod tests {
|
||||||
failure_domain: Some("rack-a".to_string()),
|
failure_domain: Some("rack-a".to_string()),
|
||||||
nix_profile: None,
|
nix_profile: None,
|
||||||
install_plan: None,
|
install_plan: None,
|
||||||
|
hardware_facts: None,
|
||||||
desired_system: None,
|
desired_system: None,
|
||||||
state: Some(match NodeState::Pending {
|
state: Some(match NodeState::Pending {
|
||||||
NodeState::Pending => "pending".to_string(),
|
NodeState::Pending => "pending".to_string(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}),
|
}),
|
||||||
|
commission_state: None,
|
||||||
|
install_state: None,
|
||||||
|
commissioned_at: None,
|
||||||
|
last_inventory_hash: None,
|
||||||
|
power_state: None,
|
||||||
|
bmc_ref: None,
|
||||||
last_heartbeat: None,
|
last_heartbeat: None,
|
||||||
}],
|
}],
|
||||||
node_classes: vec![deployer_types::NodeClassSpec {
|
node_classes: vec![deployer_types::NodeClassSpec {
|
||||||
|
|
@ -922,6 +1448,7 @@ mod tests {
|
||||||
labels: HashMap::from([("env".to_string(), "dev".to_string())]),
|
labels: HashMap::from([("env".to_string(), "dev".to_string())]),
|
||||||
}],
|
}],
|
||||||
enrollment_rules: vec![],
|
enrollment_rules: vec![],
|
||||||
|
host_deployments: vec![],
|
||||||
services: vec![],
|
services: vec![],
|
||||||
instances: vec![],
|
instances: vec![],
|
||||||
mtls_policies: vec![],
|
mtls_policies: vec![],
|
||||||
|
|
@ -983,11 +1510,13 @@ mod tests {
|
||||||
let mut spec = test_spec();
|
let mut spec = test_spec();
|
||||||
spec.nodes[0].desired_system = Some(DesiredSystemSpec {
|
spec.nodes[0].desired_system = Some(DesiredSystemSpec {
|
||||||
node_id: String::new(),
|
node_id: String::new(),
|
||||||
|
deployment_id: None,
|
||||||
nixos_configuration: Some("node01-next".to_string()),
|
nixos_configuration: Some("node01-next".to_string()),
|
||||||
flake_ref: Some("github:centra/cloud".to_string()),
|
flake_ref: Some("github:centra/cloud".to_string()),
|
||||||
switch_action: Some("boot".to_string()),
|
switch_action: Some("boot".to_string()),
|
||||||
health_check_command: vec!["true".to_string()],
|
health_check_command: vec!["true".to_string()],
|
||||||
rollback_on_failure: Some(false),
|
rollback_on_failure: Some(false),
|
||||||
|
drain_before_apply: Some(false),
|
||||||
});
|
});
|
||||||
|
|
||||||
let resolved = resolve_nodes(&spec).unwrap();
|
let resolved = resolve_nodes(&spec).unwrap();
|
||||||
|
|
@ -1012,6 +1541,14 @@ mod tests {
|
||||||
&format!("{}nodes/node01/observed-system", prefix),
|
&format!("{}nodes/node01/observed-system", prefix),
|
||||||
&prefix
|
&prefix
|
||||||
));
|
));
|
||||||
|
assert!(is_prunable_key(
|
||||||
|
&format!("{}deployments/hosts/worker-rollout/spec", prefix),
|
||||||
|
&prefix
|
||||||
|
));
|
||||||
|
assert!(!is_prunable_key(
|
||||||
|
&format!("{}deployments/hosts/worker-rollout/status", prefix),
|
||||||
|
&prefix
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1028,6 +1565,7 @@ fn is_prunable_key(key: &str, prefix: &str) -> bool {
|
||||||
key.starts_with(&format!("{}node-classes/", prefix))
|
key.starts_with(&format!("{}node-classes/", prefix))
|
||||||
|| key.starts_with(&format!("{}pools/", prefix))
|
|| key.starts_with(&format!("{}pools/", prefix))
|
||||||
|| key.starts_with(&format!("{}enrollment-rules/", prefix))
|
|| key.starts_with(&format!("{}enrollment-rules/", prefix))
|
||||||
|
|| key.starts_with(&format!("{}deployments/hosts/", prefix)) && key.ends_with("/spec")
|
||||||
|| key.starts_with(&format!("{}services/", prefix))
|
|| key.starts_with(&format!("{}services/", prefix))
|
||||||
|| key.starts_with(&format!("{}instances/", prefix))
|
|| key.starts_with(&format!("{}instances/", prefix))
|
||||||
|| key.starts_with(&format!("{}mtls/policies/", prefix))
|
|| key.starts_with(&format!("{}mtls/policies/", prefix))
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ use clap::{Parser, Subcommand, ValueEnum};
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
mod chainfire;
|
mod chainfire;
|
||||||
|
mod power;
|
||||||
mod remote;
|
mod remote;
|
||||||
|
|
||||||
/// Deployer control CLI for PhotonCloud.
|
/// Deployer control CLI for PhotonCloud.
|
||||||
|
|
@ -82,6 +83,132 @@ enum Command {
|
||||||
#[arg(long, default_value = "status")]
|
#[arg(long, default_value = "status")]
|
||||||
action: String,
|
action: String,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// ノード単位の inventory / lifecycle 状態を確認・更新する
|
||||||
|
Node {
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: NodeCommand,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// HostDeployment rollout object を確認・操作する
|
||||||
|
Deployment {
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: DeploymentCommand,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Subcommand, Debug)]
|
||||||
|
enum NodeCommand {
|
||||||
|
/// 指定ノードの記録と関連 state を表示する
|
||||||
|
Inspect {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
include_desired_system: bool,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
include_observed_system: bool,
|
||||||
|
|
||||||
|
#[arg(long, value_enum, default_value_t = DumpFormat::Json)]
|
||||||
|
format: DumpFormat,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// 指定ノードの lifecycle / commissioning 状態を更新する
|
||||||
|
SetState {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
#[arg(long, value_enum)]
|
||||||
|
state: Option<NodeLifecycleStateArg>,
|
||||||
|
|
||||||
|
#[arg(long, value_enum)]
|
||||||
|
commission_state: Option<CommissionStateArg>,
|
||||||
|
|
||||||
|
#[arg(long, value_enum)]
|
||||||
|
install_state: Option<InstallStateArg>,
|
||||||
|
|
||||||
|
#[arg(long, value_enum)]
|
||||||
|
power_state: Option<PowerStateArg>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
bmc_ref: Option<String>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// 指定ノードの observed-system を更新する
|
||||||
|
SetObserved {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
status: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
nixos_configuration: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
target_system: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
current_system: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
configured_system: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
booted_system: Option<String>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
rollback_system: Option<String>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// 指定ノードの電源操作を行う
|
||||||
|
Power {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
#[arg(long, value_enum)]
|
||||||
|
action: PowerActionArg,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// 指定ノードに再インストールを要求する
|
||||||
|
Reinstall {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
power_cycle: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Subcommand, Debug)]
|
||||||
|
enum DeploymentCommand {
|
||||||
|
/// HostDeployment の spec/status を表示する
|
||||||
|
Inspect {
|
||||||
|
#[arg(long)]
|
||||||
|
name: String,
|
||||||
|
|
||||||
|
#[arg(long, value_enum, default_value_t = DumpFormat::Json)]
|
||||||
|
format: DumpFormat,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// HostDeployment を一時停止する
|
||||||
|
Pause {
|
||||||
|
#[arg(long)]
|
||||||
|
name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// HostDeployment を再開する
|
||||||
|
Resume {
|
||||||
|
#[arg(long)]
|
||||||
|
name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// HostDeployment を中止し、配布済み desired-system を取り消す
|
||||||
|
Abort {
|
||||||
|
#[arg(long)]
|
||||||
|
name: String,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
|
@ -90,6 +217,103 @@ enum DumpFormat {
|
||||||
Json,
|
Json,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
enum NodeLifecycleStateArg {
|
||||||
|
Pending,
|
||||||
|
Provisioning,
|
||||||
|
Active,
|
||||||
|
Failed,
|
||||||
|
Draining,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NodeLifecycleStateArg {
|
||||||
|
fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Pending => "pending",
|
||||||
|
Self::Provisioning => "provisioning",
|
||||||
|
Self::Active => "active",
|
||||||
|
Self::Failed => "failed",
|
||||||
|
Self::Draining => "draining",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
enum CommissionStateArg {
|
||||||
|
Discovered,
|
||||||
|
Commissioning,
|
||||||
|
Commissioned,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CommissionStateArg {
|
||||||
|
fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Discovered => "discovered",
|
||||||
|
Self::Commissioning => "commissioning",
|
||||||
|
Self::Commissioned => "commissioned",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
enum InstallStateArg {
|
||||||
|
Pending,
|
||||||
|
Installing,
|
||||||
|
Installed,
|
||||||
|
Failed,
|
||||||
|
ReinstallRequested,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InstallStateArg {
|
||||||
|
fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Pending => "pending",
|
||||||
|
Self::Installing => "installing",
|
||||||
|
Self::Installed => "installed",
|
||||||
|
Self::Failed => "failed",
|
||||||
|
Self::ReinstallRequested => "reinstall_requested",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
enum PowerStateArg {
|
||||||
|
On,
|
||||||
|
Off,
|
||||||
|
Cycling,
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PowerStateArg {
|
||||||
|
fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::On => "on",
|
||||||
|
Self::Off => "off",
|
||||||
|
Self::Cycling => "cycling",
|
||||||
|
Self::Unknown => "unknown",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
enum PowerActionArg {
|
||||||
|
On,
|
||||||
|
Off,
|
||||||
|
Cycle,
|
||||||
|
Refresh,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PowerActionArg {
|
||||||
|
fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::On => "on",
|
||||||
|
Self::Off => "off",
|
||||||
|
Self::Cycle => "cycle",
|
||||||
|
Self::Refresh => "refresh",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
let env_filter =
|
let env_filter =
|
||||||
|
|
@ -139,6 +363,149 @@ async fn main() -> Result<()> {
|
||||||
Command::Deployer { endpoint, action } => {
|
Command::Deployer { endpoint, action } => {
|
||||||
remote::run_deployer_command(&endpoint, &action).await?;
|
remote::run_deployer_command(&endpoint, &action).await?;
|
||||||
}
|
}
|
||||||
|
Command::Node { command } => {
|
||||||
|
let cluster_id = cli
|
||||||
|
.cluster_id
|
||||||
|
.as_deref()
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("--cluster-id is required for node commands"))?;
|
||||||
|
|
||||||
|
match command {
|
||||||
|
NodeCommand::Inspect {
|
||||||
|
node_id,
|
||||||
|
include_desired_system,
|
||||||
|
include_observed_system,
|
||||||
|
format,
|
||||||
|
} => {
|
||||||
|
chainfire::inspect_node(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_id,
|
||||||
|
include_desired_system,
|
||||||
|
include_observed_system,
|
||||||
|
matches!(format, DumpFormat::Json),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
NodeCommand::SetState {
|
||||||
|
node_id,
|
||||||
|
state,
|
||||||
|
commission_state,
|
||||||
|
install_state,
|
||||||
|
power_state,
|
||||||
|
bmc_ref,
|
||||||
|
} => {
|
||||||
|
chainfire::set_node_states(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_id,
|
||||||
|
state.map(|value| value.as_str().to_string()),
|
||||||
|
commission_state.map(|value| value.as_str().to_string()),
|
||||||
|
install_state.map(|value| value.as_str().to_string()),
|
||||||
|
power_state.map(|value| value.as_str().to_string()),
|
||||||
|
bmc_ref,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
NodeCommand::SetObserved {
|
||||||
|
node_id,
|
||||||
|
status,
|
||||||
|
nixos_configuration,
|
||||||
|
target_system,
|
||||||
|
current_system,
|
||||||
|
configured_system,
|
||||||
|
booted_system,
|
||||||
|
rollback_system,
|
||||||
|
} => {
|
||||||
|
chainfire::set_observed_system(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_id,
|
||||||
|
status,
|
||||||
|
nixos_configuration,
|
||||||
|
target_system,
|
||||||
|
current_system,
|
||||||
|
configured_system,
|
||||||
|
booted_system,
|
||||||
|
rollback_system,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
NodeCommand::Power { node_id, action } => {
|
||||||
|
power::power_node(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_id,
|
||||||
|
action.as_str(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
NodeCommand::Reinstall {
|
||||||
|
node_id,
|
||||||
|
power_cycle,
|
||||||
|
} => {
|
||||||
|
power::request_reinstall(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_id,
|
||||||
|
power_cycle,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Command::Deployment { command } => {
|
||||||
|
let cluster_id = cli
|
||||||
|
.cluster_id
|
||||||
|
.as_deref()
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("--cluster-id is required for deployment commands"))?;
|
||||||
|
|
||||||
|
match command {
|
||||||
|
DeploymentCommand::Inspect { name, format } => {
|
||||||
|
chainfire::inspect_host_deployment(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&name,
|
||||||
|
matches!(format, DumpFormat::Json),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
DeploymentCommand::Pause { name } => {
|
||||||
|
chainfire::set_host_deployment_paused(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&name,
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
DeploymentCommand::Resume { name } => {
|
||||||
|
chainfire::set_host_deployment_paused(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&name,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
DeploymentCommand::Abort { name } => {
|
||||||
|
chainfire::abort_host_deployment(
|
||||||
|
&cli.chainfire_endpoint,
|
||||||
|
&cli.cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&name,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
372
deployer/crates/deployer-ctl/src/power.rs
Normal file
372
deployer/crates/deployer-ctl/src/power.rs
Normal file
|
|
@ -0,0 +1,372 @@
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chainfire_client::Client;
|
||||||
|
use deployer_types::{ClusterNodeRecord, InstallState, PowerState};
|
||||||
|
use reqwest::{Client as HttpClient, Url};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
|
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_desired_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}/desired-system",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_observed_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}/observed-system",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chainfire_endpoints(raw: &str) -> Vec<String> {
|
||||||
|
raw.split(',')
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|endpoint| !endpoint.is_empty())
|
||||||
|
.map(ToOwned::to_owned)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
enum PowerAction {
|
||||||
|
On,
|
||||||
|
Off,
|
||||||
|
Cycle,
|
||||||
|
Refresh,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PowerAction {
|
||||||
|
fn parse(value: &str) -> Result<Self> {
|
||||||
|
match value {
|
||||||
|
"on" => Ok(Self::On),
|
||||||
|
"off" => Ok(Self::Off),
|
||||||
|
"cycle" => Ok(Self::Cycle),
|
||||||
|
"refresh" => Ok(Self::Refresh),
|
||||||
|
other => Err(anyhow::anyhow!("unsupported power action {}", other)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reset_type(self) -> Option<&'static str> {
|
||||||
|
match self {
|
||||||
|
Self::On => Some("On"),
|
||||||
|
Self::Off => Some("ForceOff"),
|
||||||
|
Self::Cycle => Some("PowerCycle"),
|
||||||
|
Self::Refresh => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct RedfishTarget {
|
||||||
|
resource_url: Url,
|
||||||
|
username: Option<String>,
|
||||||
|
password: Option<String>,
|
||||||
|
insecure: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct RedfishSystemView {
|
||||||
|
#[serde(rename = "PowerState")]
|
||||||
|
power_state: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RedfishTarget {
|
||||||
|
fn parse(reference: &str) -> Result<Self> {
|
||||||
|
let rewritten = if let Some(rest) = reference.strip_prefix("redfish+http://") {
|
||||||
|
format!("http://{rest}")
|
||||||
|
} else if let Some(rest) = reference.strip_prefix("redfish+https://") {
|
||||||
|
format!("https://{rest}")
|
||||||
|
} else if let Some(rest) = reference.strip_prefix("redfish://") {
|
||||||
|
format!("https://{rest}")
|
||||||
|
} else {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"unsupported BMC reference {}; expected redfish:// or redfish+http(s)://",
|
||||||
|
reference
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut resource_url = Url::parse(&rewritten)
|
||||||
|
.with_context(|| format!("failed to parse BMC reference {}", reference))?;
|
||||||
|
let insecure = resource_url
|
||||||
|
.query_pairs()
|
||||||
|
.any(|(key, value)| key == "insecure" && (value == "1" || value == "true"));
|
||||||
|
let username = if resource_url.username().is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(resource_url.username().to_string())
|
||||||
|
};
|
||||||
|
let password = resource_url.password().map(ToOwned::to_owned);
|
||||||
|
let system_path = normalize_redfish_system_path(resource_url.path());
|
||||||
|
resource_url
|
||||||
|
.set_username("")
|
||||||
|
.map_err(|_| anyhow::anyhow!("failed to clear username from BMC reference"))?;
|
||||||
|
resource_url
|
||||||
|
.set_password(None)
|
||||||
|
.map_err(|_| anyhow::anyhow!("failed to clear password from BMC reference"))?;
|
||||||
|
resource_url.set_query(None);
|
||||||
|
resource_url.set_path(&system_path);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
resource_url,
|
||||||
|
username,
|
||||||
|
password,
|
||||||
|
insecure,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn action_url(&self) -> Result<Url> {
|
||||||
|
let mut action_url = self.resource_url.clone();
|
||||||
|
let path = format!(
|
||||||
|
"{}/Actions/ComputerSystem.Reset",
|
||||||
|
self.resource_url.path().trim_end_matches('/')
|
||||||
|
);
|
||||||
|
action_url.set_path(&path);
|
||||||
|
Ok(action_url)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn perform(&self, action: PowerAction) -> Result<PowerState> {
|
||||||
|
let client = HttpClient::builder()
|
||||||
|
.danger_accept_invalid_certs(self.insecure)
|
||||||
|
.build()
|
||||||
|
.context("failed to create Redfish client")?;
|
||||||
|
|
||||||
|
if let Some(reset_type) = action.reset_type() {
|
||||||
|
let request = self
|
||||||
|
.with_auth(client.post(self.action_url()?))
|
||||||
|
.json(&json!({ "ResetType": reset_type }));
|
||||||
|
request
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.context("failed to send Redfish reset request")?
|
||||||
|
.error_for_status()
|
||||||
|
.context("Redfish reset request failed")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
match action {
|
||||||
|
PowerAction::Cycle => Ok(PowerState::Cycling),
|
||||||
|
PowerAction::On | PowerAction::Off | PowerAction::Refresh => self.refresh(&client).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn refresh(&self, client: &HttpClient) -> Result<PowerState> {
|
||||||
|
let response = self
|
||||||
|
.with_auth(client.get(self.resource_url.clone()))
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.context("failed to query Redfish system resource")?
|
||||||
|
.error_for_status()
|
||||||
|
.context("Redfish system query failed")?;
|
||||||
|
let system: RedfishSystemView = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.context("failed to decode Redfish system response")?;
|
||||||
|
map_redfish_power_state(system.power_state.as_deref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_auth(&self, request: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
|
||||||
|
match self.username.as_deref() {
|
||||||
|
Some(username) => request.basic_auth(username, self.password.clone()),
|
||||||
|
None => request,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_redfish_system_path(path: &str) -> String {
|
||||||
|
let trimmed = path.trim();
|
||||||
|
if trimmed.is_empty() || trimmed == "/" {
|
||||||
|
return "/redfish/v1/Systems/System.Embedded.1".to_string();
|
||||||
|
}
|
||||||
|
if trimmed.starts_with("/redfish/") {
|
||||||
|
return trimmed.to_string();
|
||||||
|
}
|
||||||
|
format!("/redfish/v1/Systems/{}", trimmed.trim_start_matches('/'))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_redfish_power_state(value: Option<&str>) -> Result<PowerState> {
|
||||||
|
match value.unwrap_or("Unknown").to_ascii_lowercase().as_str() {
|
||||||
|
"on" => Ok(PowerState::On),
|
||||||
|
"off" => Ok(PowerState::Off),
|
||||||
|
"poweringon" | "poweringoff" | "cycling" => Ok(PowerState::Cycling),
|
||||||
|
"unknown" => Ok(PowerState::Unknown),
|
||||||
|
other => Err(anyhow::anyhow!("unsupported Redfish power state {}", other)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_node_record(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
) -> Result<(Client, ClusterNodeRecord, Vec<u8>)> {
|
||||||
|
let endpoints = chainfire_endpoints(endpoint);
|
||||||
|
let mut last_error = None;
|
||||||
|
|
||||||
|
for endpoint in endpoints {
|
||||||
|
match Client::connect(endpoint.clone()).await {
|
||||||
|
Ok(mut client) => {
|
||||||
|
let key = key_node(cluster_namespace, cluster_id, node_id);
|
||||||
|
let Some(bytes) = client.get(&key).await? else {
|
||||||
|
return Err(anyhow::anyhow!("node {} not found", node_id));
|
||||||
|
};
|
||||||
|
let node = serde_json::from_slice::<ClusterNodeRecord>(&bytes)
|
||||||
|
.context("failed to decode node record")?;
|
||||||
|
return Ok((client, node, key));
|
||||||
|
}
|
||||||
|
Err(error) => last_error = Some(anyhow::Error::new(error)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(last_error.unwrap_or_else(|| anyhow::anyhow!("no Chainfire endpoints configured")))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn power_node(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
action: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let action = PowerAction::parse(action)?;
|
||||||
|
let (mut client, mut node, key) =
|
||||||
|
load_node_record(endpoint, cluster_namespace, cluster_id, node_id).await?;
|
||||||
|
let bmc_ref = node
|
||||||
|
.bmc_ref
|
||||||
|
.clone()
|
||||||
|
.with_context(|| format!("node {} does not have a bmc_ref", node_id))?;
|
||||||
|
let target = RedfishTarget::parse(&bmc_ref)?;
|
||||||
|
let power_state = target.perform(action).await?;
|
||||||
|
|
||||||
|
node.power_state = Some(power_state);
|
||||||
|
client.put(&key, &serde_json::to_vec(&node)?).await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&node)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn request_reinstall(
|
||||||
|
endpoint: &str,
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
node_id: &str,
|
||||||
|
power_cycle: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let (mut client, mut node, key) =
|
||||||
|
load_node_record(endpoint, cluster_namespace, cluster_id, node_id).await?;
|
||||||
|
|
||||||
|
node.state = Some("provisioning".to_string());
|
||||||
|
node.install_state = Some(InstallState::ReinstallRequested);
|
||||||
|
|
||||||
|
if power_cycle {
|
||||||
|
let bmc_ref = node
|
||||||
|
.bmc_ref
|
||||||
|
.clone()
|
||||||
|
.with_context(|| format!("node {} does not have a bmc_ref", node_id))?;
|
||||||
|
let target = RedfishTarget::parse(&bmc_ref)?;
|
||||||
|
node.power_state = Some(target.perform(PowerAction::Cycle).await?);
|
||||||
|
}
|
||||||
|
|
||||||
|
client.put(&key, &serde_json::to_vec(&node)?).await?;
|
||||||
|
client
|
||||||
|
.delete(&key_desired_system(cluster_namespace, cluster_id, node_id))
|
||||||
|
.await?;
|
||||||
|
client
|
||||||
|
.delete(&key_observed_system(cluster_namespace, cluster_id, node_id))
|
||||||
|
.await?;
|
||||||
|
println!("{}", serde_json::to_string_pretty(&node)?);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use axum::{extract::State, http::StatusCode, routing::{get, post}, Json, Router};
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
use tokio::net::TcpListener;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_redfish_short_reference_defaults_to_https() {
|
||||||
|
let parsed = RedfishTarget::parse("redfish://lab-bmc/node01").unwrap();
|
||||||
|
assert_eq!(parsed.resource_url.as_str(), "https://lab-bmc/redfish/v1/Systems/node01");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_redfish_explicit_http_reference_keeps_query_flags_local() {
|
||||||
|
let parsed =
|
||||||
|
RedfishTarget::parse("redfish+http://user:pass@127.0.0.1/system-1?insecure=1").unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
parsed.resource_url.as_str(),
|
||||||
|
"http://127.0.0.1/redfish/v1/Systems/system-1"
|
||||||
|
);
|
||||||
|
assert_eq!(parsed.username.as_deref(), Some("user"));
|
||||||
|
assert_eq!(parsed.password.as_deref(), Some("pass"));
|
||||||
|
assert!(parsed.insecure);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn redfish_adapter_refreshes_and_resets_power() {
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
struct TestState {
|
||||||
|
seen_payloads: Arc<Mutex<Vec<String>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn system_handler() -> Json<Value> {
|
||||||
|
Json(json!({ "PowerState": "On" }))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reset_handler(
|
||||||
|
State(state): State<TestState>,
|
||||||
|
Json(payload): Json<Value>,
|
||||||
|
) -> StatusCode {
|
||||||
|
state
|
||||||
|
.seen_payloads
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.push(payload.to_string());
|
||||||
|
StatusCode::NO_CONTENT
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = TestState::default();
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/redfish/v1/Systems/node01", get(system_handler))
|
||||||
|
.route(
|
||||||
|
"/redfish/v1/Systems/node01/Actions/ComputerSystem.Reset",
|
||||||
|
post(reset_handler),
|
||||||
|
)
|
||||||
|
.with_state(state.clone());
|
||||||
|
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||||
|
let addr = listener.local_addr().unwrap();
|
||||||
|
let server = tokio::spawn(async move {
|
||||||
|
axum::serve(listener, app).await.unwrap();
|
||||||
|
});
|
||||||
|
|
||||||
|
let target = RedfishTarget::parse(&format!(
|
||||||
|
"redfish+http://{}/redfish/v1/Systems/node01",
|
||||||
|
addr
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(target.perform(PowerAction::Refresh).await.unwrap(), PowerState::On);
|
||||||
|
assert_eq!(target.perform(PowerAction::Off).await.unwrap(), PowerState::On);
|
||||||
|
|
||||||
|
let payloads = state.seen_payloads.lock().unwrap().clone();
|
||||||
|
assert_eq!(payloads, vec![r#"{"ResetType":"ForceOff"}"#.to_string()]);
|
||||||
|
|
||||||
|
server.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -29,6 +29,7 @@ tracing-subscriber = { workspace = true }
|
||||||
chrono = { workspace = true }
|
chrono = { workspace = true }
|
||||||
rcgen = { workspace = true }
|
rcgen = { workspace = true }
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
|
sha2 = "0.10"
|
||||||
|
|
||||||
# ChainFire for state management
|
# ChainFire for state management
|
||||||
chainfire-client = { workspace = true }
|
chainfire-client = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,11 @@
|
||||||
use axum::{extract::State, http::HeaderMap, http::StatusCode, Json};
|
use axum::{extract::State, http::HeaderMap, http::StatusCode, Json};
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use deployer_types::{
|
use deployer_types::{
|
||||||
EnrollmentRuleSpec, HardwareFacts, InstallPlan, NodeClassSpec, NodeConfig, NodeInfo,
|
CommissionState, EnrollmentRuleSpec, HardwareFacts, InstallPlan, InstallState,
|
||||||
NodePoolSpec, NodeState, PhoneHomeRequest, PhoneHomeResponse,
|
NodeClassSpec, NodeConfig, NodeInfo, NodePoolSpec, NodeState, PhoneHomeRequest,
|
||||||
|
PhoneHomeResponse, PowerState,
|
||||||
};
|
};
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
|
|
||||||
|
|
@ -49,6 +51,14 @@ fn merge_hardware_summary_metadata(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn inventory_hash(hardware_facts: Option<&HardwareFacts>) -> Option<String> {
|
||||||
|
let hardware_facts = hardware_facts?;
|
||||||
|
let payload = serde_json::to_vec(hardware_facts).ok()?;
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(payload);
|
||||||
|
Some(format!("{:x}", hasher.finalize()))
|
||||||
|
}
|
||||||
|
|
||||||
/// POST /api/v1/phone-home
|
/// POST /api/v1/phone-home
|
||||||
///
|
///
|
||||||
/// Handles node registration during first boot.
|
/// Handles node registration during first boot.
|
||||||
|
|
@ -794,6 +804,21 @@ async fn store_cluster_node_if_configured(
|
||||||
install_plan: node_config.install_plan.clone(),
|
install_plan: node_config.install_plan.clone(),
|
||||||
hardware_facts: hardware_facts.cloned(),
|
hardware_facts: hardware_facts.cloned(),
|
||||||
state: Some(format!("{:?}", node_info.state).to_lowercase()),
|
state: Some(format!("{:?}", node_info.state).to_lowercase()),
|
||||||
|
commission_state: hardware_facts.map(|_| CommissionState::Discovered),
|
||||||
|
install_state: node_config.install_plan.as_ref().map(|_| InstallState::Pending),
|
||||||
|
commissioned_at: None,
|
||||||
|
last_inventory_hash: inventory_hash(hardware_facts),
|
||||||
|
power_state: node_info
|
||||||
|
.metadata
|
||||||
|
.get("power_state")
|
||||||
|
.and_then(|value| match value.as_str() {
|
||||||
|
"on" => Some(PowerState::On),
|
||||||
|
"off" => Some(PowerState::Off),
|
||||||
|
"cycling" => Some(PowerState::Cycling),
|
||||||
|
"unknown" => Some(PowerState::Unknown),
|
||||||
|
_ => None,
|
||||||
|
}),
|
||||||
|
bmc_ref: node_info.metadata.get("bmc_ref").cloned(),
|
||||||
last_heartbeat: Some(node_info.last_heartbeat),
|
last_heartbeat: Some(node_info.last_heartbeat),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,62 @@ impl Default for NodeState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Commissioning lifecycle for inventory-driven bare-metal onboarding.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum CommissionState {
|
||||||
|
/// Node has been discovered and reported inventory but not yet approved.
|
||||||
|
Discovered,
|
||||||
|
/// Manual or automated commissioning is actively validating the node.
|
||||||
|
Commissioning,
|
||||||
|
/// Inventory has been accepted and the node can be installed or rolled out.
|
||||||
|
Commissioned,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for CommissionState {
|
||||||
|
fn default() -> Self {
|
||||||
|
CommissionState::Discovered
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Installation lifecycle for host provisioning and reprovisioning.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum InstallState {
|
||||||
|
/// No install is currently running, but an install may be planned.
|
||||||
|
Pending,
|
||||||
|
/// Bootstrap or reinstall is actively writing the target system.
|
||||||
|
Installing,
|
||||||
|
/// The desired system has been installed successfully.
|
||||||
|
Installed,
|
||||||
|
/// Installation failed and needs operator or controller intervention.
|
||||||
|
Failed,
|
||||||
|
/// A reinstall has been requested but not started yet.
|
||||||
|
ReinstallRequested,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for InstallState {
|
||||||
|
fn default() -> Self {
|
||||||
|
InstallState::Pending
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Best-effort power state tracked by external management adapters.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum PowerState {
|
||||||
|
On,
|
||||||
|
Off,
|
||||||
|
Cycling,
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for PowerState {
|
||||||
|
fn default() -> Self {
|
||||||
|
PowerState::Unknown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Node information tracked by Deployer
|
/// Node information tracked by Deployer
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct NodeInfo {
|
pub struct NodeInfo {
|
||||||
|
|
@ -492,6 +548,18 @@ pub struct ClusterNodeRecord {
|
||||||
pub hardware_facts: Option<HardwareFacts>,
|
pub hardware_facts: Option<HardwareFacts>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub state: Option<String>,
|
pub state: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub commission_state: Option<CommissionState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub install_state: Option<InstallState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub commissioned_at: Option<DateTime<Utc>>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub last_inventory_hash: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub power_state: Option<PowerState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub bmc_ref: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub last_heartbeat: Option<DateTime<Utc>>,
|
pub last_heartbeat: Option<DateTime<Utc>>,
|
||||||
}
|
}
|
||||||
|
|
@ -534,6 +602,8 @@ pub struct DesiredSystemSpec {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub node_id: String,
|
pub node_id: String,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub deployment_id: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub nixos_configuration: Option<String>,
|
pub nixos_configuration: Option<String>,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub flake_ref: Option<String>,
|
pub flake_ref: Option<String>,
|
||||||
|
|
@ -543,6 +613,8 @@ pub struct DesiredSystemSpec {
|
||||||
pub health_check_command: Vec<String>,
|
pub health_check_command: Vec<String>,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub rollback_on_failure: Option<bool>,
|
pub rollback_on_failure: Option<bool>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub drain_before_apply: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cluster metadata (PhotonCloud scope).
|
/// Cluster metadata (PhotonCloud scope).
|
||||||
|
|
@ -576,9 +648,23 @@ pub struct NodeSpec {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub install_plan: Option<InstallPlan>,
|
pub install_plan: Option<InstallPlan>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
pub hardware_facts: Option<HardwareFacts>,
|
||||||
|
#[serde(default)]
|
||||||
pub desired_system: Option<DesiredSystemSpec>,
|
pub desired_system: Option<DesiredSystemSpec>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub state: Option<String>,
|
pub state: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub commission_state: Option<CommissionState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub install_state: Option<InstallState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub commissioned_at: Option<DateTime<Utc>>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub last_inventory_hash: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub power_state: Option<PowerState>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub bmc_ref: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub last_heartbeat: Option<DateTime<Utc>>,
|
pub last_heartbeat: Option<DateTime<Utc>>,
|
||||||
}
|
}
|
||||||
|
|
@ -647,6 +733,74 @@ pub struct EnrollmentRuleSpec {
|
||||||
pub node_id_prefix: Option<String>,
|
pub node_id_prefix: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Selector used by host deployments to target bare-metal nodes declaratively.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
|
pub struct HostDeploymentSelector {
|
||||||
|
#[serde(default)]
|
||||||
|
pub node_ids: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub roles: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub pools: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub node_classes: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub match_labels: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Declarative rollout intent for host-level NixOS updates.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct HostDeploymentSpec {
|
||||||
|
pub name: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub selector: HostDeploymentSelector,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub nixos_configuration: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub flake_ref: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub batch_size: Option<u32>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub max_unavailable: Option<u32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub health_check_command: Vec<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub switch_action: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub rollback_on_failure: Option<bool>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub drain_before_apply: Option<bool>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub reboot_policy: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub paused: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Controller-observed rollout state for a host deployment.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
|
pub struct HostDeploymentStatus {
|
||||||
|
#[serde(default)]
|
||||||
|
pub name: String,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub phase: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub paused: bool,
|
||||||
|
#[serde(default)]
|
||||||
|
pub paused_by_operator: bool,
|
||||||
|
#[serde(default)]
|
||||||
|
pub selected_nodes: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub completed_nodes: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub in_progress_nodes: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub failed_nodes: Vec<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub message: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub updated_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Service ports for logical service definitions.
|
/// Service ports for logical service definitions.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
pub struct ServicePorts {
|
pub struct ServicePorts {
|
||||||
|
|
@ -807,6 +961,8 @@ pub struct ClusterStateSpec {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub enrollment_rules: Vec<EnrollmentRuleSpec>,
|
pub enrollment_rules: Vec<EnrollmentRuleSpec>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
pub host_deployments: Vec<HostDeploymentSpec>,
|
||||||
|
#[serde(default)]
|
||||||
pub services: Vec<ServiceSpec>,
|
pub services: Vec<ServiceSpec>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub instances: Vec<ServiceInstanceSpec>,
|
pub instances: Vec<ServiceInstanceSpec>,
|
||||||
|
|
@ -1080,19 +1236,92 @@ mod tests {
|
||||||
fn test_desired_system_spec_roundtrip() {
|
fn test_desired_system_spec_roundtrip() {
|
||||||
let desired = DesiredSystemSpec {
|
let desired = DesiredSystemSpec {
|
||||||
node_id: "node01".to_string(),
|
node_id: "node01".to_string(),
|
||||||
|
deployment_id: Some("worker-rollout".to_string()),
|
||||||
nixos_configuration: Some("node01".to_string()),
|
nixos_configuration: Some("node01".to_string()),
|
||||||
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
||||||
switch_action: Some("switch".to_string()),
|
switch_action: Some("switch".to_string()),
|
||||||
health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()],
|
health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()],
|
||||||
rollback_on_failure: Some(true),
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(true),
|
||||||
};
|
};
|
||||||
|
|
||||||
let json = serde_json::to_string(&desired).unwrap();
|
let json = serde_json::to_string(&desired).unwrap();
|
||||||
let decoded: DesiredSystemSpec = serde_json::from_str(&json).unwrap();
|
let decoded: DesiredSystemSpec = serde_json::from_str(&json).unwrap();
|
||||||
assert_eq!(decoded.node_id, "node01");
|
assert_eq!(decoded.node_id, "node01");
|
||||||
|
assert_eq!(decoded.deployment_id.as_deref(), Some("worker-rollout"));
|
||||||
assert_eq!(decoded.nixos_configuration.as_deref(), Some("node01"));
|
assert_eq!(decoded.nixos_configuration.as_deref(), Some("node01"));
|
||||||
assert_eq!(decoded.health_check_command.len(), 2);
|
assert_eq!(decoded.health_check_command.len(), 2);
|
||||||
assert_eq!(decoded.rollback_on_failure, Some(true));
|
assert_eq!(decoded.rollback_on_failure, Some(true));
|
||||||
|
assert_eq!(decoded.drain_before_apply, Some(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_host_deployment_roundtrip() {
|
||||||
|
let spec = HostDeploymentSpec {
|
||||||
|
name: "worker-rollout".to_string(),
|
||||||
|
selector: HostDeploymentSelector {
|
||||||
|
node_ids: vec![],
|
||||||
|
roles: vec!["worker".to_string()],
|
||||||
|
pools: vec!["general".to_string()],
|
||||||
|
node_classes: vec!["worker-linux".to_string()],
|
||||||
|
match_labels: HashMap::from([("tier".to_string(), "general".to_string())]),
|
||||||
|
},
|
||||||
|
nixos_configuration: Some("worker-golden".to_string()),
|
||||||
|
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
||||||
|
batch_size: Some(1),
|
||||||
|
max_unavailable: Some(1),
|
||||||
|
health_check_command: vec!["true".to_string()],
|
||||||
|
switch_action: Some("boot".to_string()),
|
||||||
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(true),
|
||||||
|
reboot_policy: Some("always".to_string()),
|
||||||
|
paused: Some(false),
|
||||||
|
};
|
||||||
|
|
||||||
|
let json = serde_json::to_string(&spec).unwrap();
|
||||||
|
let decoded: HostDeploymentSpec = serde_json::from_str(&json).unwrap();
|
||||||
|
assert_eq!(decoded.name, "worker-rollout");
|
||||||
|
assert_eq!(decoded.batch_size, Some(1));
|
||||||
|
assert_eq!(decoded.max_unavailable, Some(1));
|
||||||
|
assert_eq!(decoded.selector.roles, vec!["worker".to_string()]);
|
||||||
|
assert_eq!(
|
||||||
|
decoded.selector.match_labels.get("tier").map(String::as_str),
|
||||||
|
Some("general")
|
||||||
|
);
|
||||||
|
assert_eq!(decoded.drain_before_apply, Some(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cluster_node_record_commissioning_roundtrip() {
|
||||||
|
let node = ClusterNodeRecord {
|
||||||
|
node_id: "node01".to_string(),
|
||||||
|
machine_id: Some("machine-01".to_string()),
|
||||||
|
ip: "10.0.0.11".to_string(),
|
||||||
|
hostname: "node01".to_string(),
|
||||||
|
roles: vec!["worker".to_string()],
|
||||||
|
labels: HashMap::new(),
|
||||||
|
pool: Some("general".to_string()),
|
||||||
|
node_class: Some("worker-linux".to_string()),
|
||||||
|
failure_domain: Some("rack-a".to_string()),
|
||||||
|
nix_profile: Some("profiles/worker-linux".to_string()),
|
||||||
|
install_plan: None,
|
||||||
|
hardware_facts: None,
|
||||||
|
state: Some("provisioning".to_string()),
|
||||||
|
commission_state: Some(CommissionState::Commissioned),
|
||||||
|
install_state: Some(InstallState::Installed),
|
||||||
|
commissioned_at: Some(Utc::now()),
|
||||||
|
last_inventory_hash: Some("abc123".to_string()),
|
||||||
|
power_state: Some(PowerState::On),
|
||||||
|
bmc_ref: Some("redfish://lab-rack-a/node01".to_string()),
|
||||||
|
last_heartbeat: Some(Utc::now()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let json = serde_json::to_string(&node).unwrap();
|
||||||
|
let decoded: ClusterNodeRecord = serde_json::from_str(&json).unwrap();
|
||||||
|
assert_eq!(decoded.commission_state, Some(CommissionState::Commissioned));
|
||||||
|
assert_eq!(decoded.install_state, Some(InstallState::Installed));
|
||||||
|
assert_eq!(decoded.power_state, Some(PowerState::On));
|
||||||
|
assert_eq!(decoded.bmc_ref.as_deref(), Some("redfish://lab-rack-a/node01"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -899,6 +899,12 @@ mod tests {
|
||||||
install_plan: None,
|
install_plan: None,
|
||||||
hardware_facts: None,
|
hardware_facts: None,
|
||||||
state: Some("active".to_string()),
|
state: Some("active".to_string()),
|
||||||
|
commission_state: None,
|
||||||
|
install_state: None,
|
||||||
|
commissioned_at: None,
|
||||||
|
last_inventory_hash: None,
|
||||||
|
power_state: None,
|
||||||
|
bmc_ref: None,
|
||||||
last_heartbeat: Some(Utc::now() - ChronoDuration::seconds(10)),
|
last_heartbeat: Some(Utc::now() - ChronoDuration::seconds(10)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ use std::fs;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::process::Stdio;
|
use std::process::Stdio;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use chainfire_client::Client;
|
use chainfire_client::Client;
|
||||||
|
|
@ -135,7 +136,15 @@ impl Agent {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn tick(&self) -> Result<()> {
|
async fn tick(&self) -> Result<()> {
|
||||||
|
info!(
|
||||||
|
endpoint = %self.endpoint,
|
||||||
|
cluster_namespace = %self.cluster_namespace,
|
||||||
|
cluster_id = %self.cluster_id,
|
||||||
|
node_id = %self.node_id,
|
||||||
|
"starting reconciliation tick"
|
||||||
|
);
|
||||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||||
|
info!("connected to ChainFire");
|
||||||
let node_key = key_node(&self.cluster_namespace, &self.cluster_id, &self.node_id);
|
let node_key = key_node(&self.cluster_namespace, &self.cluster_id, &self.node_id);
|
||||||
let node_raw = client.get_with_revision(&node_key).await?;
|
let node_raw = client.get_with_revision(&node_key).await?;
|
||||||
let Some((node_bytes, _revision)) = node_raw else {
|
let Some((node_bytes, _revision)) = node_raw else {
|
||||||
|
|
@ -149,6 +158,11 @@ impl Agent {
|
||||||
|
|
||||||
let node: ClusterNodeRecord =
|
let node: ClusterNodeRecord =
|
||||||
serde_json::from_slice(&node_bytes).context("failed to parse node record")?;
|
serde_json::from_slice(&node_bytes).context("failed to parse node record")?;
|
||||||
|
info!(
|
||||||
|
hostname = %node.hostname,
|
||||||
|
state = node.state.as_deref().unwrap_or("unknown"),
|
||||||
|
"loaded node record"
|
||||||
|
);
|
||||||
|
|
||||||
let desired = client
|
let desired = client
|
||||||
.get(key_desired_system(
|
.get(key_desired_system(
|
||||||
|
|
@ -160,6 +174,11 @@ impl Agent {
|
||||||
.map(|bytes| serde_json::from_slice::<DesiredSystemSpec>(&bytes))
|
.map(|bytes| serde_json::from_slice::<DesiredSystemSpec>(&bytes))
|
||||||
.transpose()
|
.transpose()
|
||||||
.context("failed to parse desired-system spec")?;
|
.context("failed to parse desired-system spec")?;
|
||||||
|
info!(
|
||||||
|
has_desired_system = desired.is_some(),
|
||||||
|
has_install_plan = node.install_plan.is_some(),
|
||||||
|
"resolved desired-state inputs"
|
||||||
|
);
|
||||||
|
|
||||||
let previous_observed = client
|
let previous_observed = client
|
||||||
.get(key_observed_system(
|
.get(key_observed_system(
|
||||||
|
|
@ -173,24 +192,87 @@ impl Agent {
|
||||||
.context("failed to parse observed-system state")?;
|
.context("failed to parse observed-system state")?;
|
||||||
|
|
||||||
let mut observed = self.base_observed_state(&node);
|
let mut observed = self.base_observed_state(&node);
|
||||||
|
observed.status = Some("planning".to_string());
|
||||||
|
info!(
|
||||||
|
current_system = observed.current_system.as_deref().unwrap_or(""),
|
||||||
|
configured_system = observed.configured_system.as_deref().unwrap_or(""),
|
||||||
|
booted_system = observed.booted_system.as_deref().unwrap_or(""),
|
||||||
|
"publishing planning status"
|
||||||
|
);
|
||||||
|
self.publish_observed_state(&mut client, &observed).await?;
|
||||||
let reconcile_result = self
|
let reconcile_result = self
|
||||||
.reconcile_node(&node, desired.as_ref(), previous_observed.as_ref(), &mut observed)
|
.reconcile_node(
|
||||||
|
&node,
|
||||||
|
desired.as_ref(),
|
||||||
|
previous_observed.as_ref(),
|
||||||
|
&mut observed,
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
if let Err(error) = reconcile_result {
|
if let Err(error) = reconcile_result {
|
||||||
observed.status = Some("failed".to_string());
|
observed.status = Some("failed".to_string());
|
||||||
observed.last_error = Some(error.to_string());
|
observed.last_error = Some(format!("{error:#}"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
status = observed.status.as_deref().unwrap_or("unknown"),
|
||||||
|
"publishing final observed status"
|
||||||
|
);
|
||||||
|
self.publish_observed_state_with_retry(&observed).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn publish_observed_state(
|
||||||
|
&self,
|
||||||
|
client: &mut Client,
|
||||||
|
observed: &ObservedSystemState,
|
||||||
|
) -> Result<()> {
|
||||||
|
info!(
|
||||||
|
status = observed.status.as_deref().unwrap_or("unknown"),
|
||||||
|
"writing observed-system state"
|
||||||
|
);
|
||||||
client
|
client
|
||||||
.put(
|
.put(
|
||||||
&key_observed_system(&self.cluster_namespace, &self.cluster_id, &self.node_id),
|
&key_observed_system(&self.cluster_namespace, &self.cluster_id, &self.node_id),
|
||||||
&serde_json::to_vec(&observed)?,
|
&serde_json::to_vec(observed)?,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn publish_observed_state_with_retry(
|
||||||
|
&self,
|
||||||
|
observed: &ObservedSystemState,
|
||||||
|
) -> Result<()> {
|
||||||
|
let payload = serde_json::to_vec(observed)?;
|
||||||
|
let key = key_observed_system(&self.cluster_namespace, &self.cluster_id, &self.node_id);
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(30);
|
||||||
|
let mut attempt = 1u32;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let result = async {
|
||||||
|
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||||
|
client.put(&key, &payload).await?;
|
||||||
|
Result::<()>::Ok(())
|
||||||
|
}
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(()) => return Ok(()),
|
||||||
|
Err(error) if Instant::now() < deadline => {
|
||||||
|
warn!(
|
||||||
|
attempt,
|
||||||
|
error = %error,
|
||||||
|
"failed to publish observed-system state; retrying with a fresh connection"
|
||||||
|
);
|
||||||
|
attempt += 1;
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
Err(error) => return Err(error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn base_observed_state(&self, node: &ClusterNodeRecord) -> ObservedSystemState {
|
fn base_observed_state(&self, node: &ClusterNodeRecord) -> ObservedSystemState {
|
||||||
ObservedSystemState {
|
ObservedSystemState {
|
||||||
node_id: node.node_id.clone(),
|
node_id: node.node_id.clone(),
|
||||||
|
|
@ -209,7 +291,18 @@ impl Agent {
|
||||||
observed: &mut ObservedSystemState,
|
observed: &mut ObservedSystemState,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
match node.state.as_deref() {
|
match node.state.as_deref() {
|
||||||
Some("failed") | Some("draining") => {
|
Some("failed") => {
|
||||||
|
observed.status = Some("paused".to_string());
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Some("draining")
|
||||||
|
if !desired
|
||||||
|
.map(|spec| {
|
||||||
|
spec.deployment_id.is_some()
|
||||||
|
&& spec.drain_before_apply.unwrap_or(false)
|
||||||
|
})
|
||||||
|
.unwrap_or(false) =>
|
||||||
|
{
|
||||||
observed.status = Some("paused".to_string());
|
observed.status = Some("paused".to_string());
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
@ -227,6 +320,14 @@ impl Agent {
|
||||||
observed.status = Some("idle".to_string());
|
observed.status = Some("idle".to_string());
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
|
info!(
|
||||||
|
nixos_configuration = %desired.nixos_configuration,
|
||||||
|
flake_ref = %desired.flake_ref,
|
||||||
|
switch_action = %desired.switch_action,
|
||||||
|
rollback_on_failure = desired.rollback_on_failure,
|
||||||
|
health_check_command = ?desired.health_check_command,
|
||||||
|
"resolved desired system"
|
||||||
|
);
|
||||||
|
|
||||||
observed.nixos_configuration = Some(desired.nixos_configuration.clone());
|
observed.nixos_configuration = Some(desired.nixos_configuration.clone());
|
||||||
observed.flake_root = Some(desired.flake_ref.clone());
|
observed.flake_root = Some(desired.flake_ref.clone());
|
||||||
|
|
@ -236,6 +337,10 @@ impl Agent {
|
||||||
.and_then(|state| state.rollback_system.clone())
|
.and_then(|state| state.rollback_system.clone())
|
||||||
.or_else(|| observed.current_system.clone());
|
.or_else(|| observed.current_system.clone());
|
||||||
observed.rollback_system = previous_system.clone();
|
observed.rollback_system = previous_system.clone();
|
||||||
|
info!(
|
||||||
|
previous_system = previous_system.as_deref().unwrap_or(""),
|
||||||
|
"selected rollback baseline"
|
||||||
|
);
|
||||||
let target_system = self
|
let target_system = self
|
||||||
.build_target_system(&desired.flake_ref, &desired.nixos_configuration)
|
.build_target_system(&desired.flake_ref, &desired.nixos_configuration)
|
||||||
.await
|
.await
|
||||||
|
|
@ -246,8 +351,10 @@ impl Agent {
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
observed.target_system = Some(target_system.clone());
|
observed.target_system = Some(target_system.clone());
|
||||||
|
info!(target_system = %target_system, "built target system");
|
||||||
|
|
||||||
if observed.current_system.as_deref() == Some(target_system.as_str()) {
|
if observed.current_system.as_deref() == Some(target_system.as_str()) {
|
||||||
|
info!("target system already active");
|
||||||
if should_run_post_boot_health_check(previous_observed, &desired, &target_system) {
|
if should_run_post_boot_health_check(previous_observed, &desired, &target_system) {
|
||||||
observed.status = Some("verifying".to_string());
|
observed.status = Some("verifying".to_string());
|
||||||
observed.last_attempt = Some(Utc::now());
|
observed.last_attempt = Some(Utc::now());
|
||||||
|
|
@ -279,8 +386,14 @@ impl Agent {
|
||||||
|
|
||||||
observed.status = Some("reconciling".to_string());
|
observed.status = Some("reconciling".to_string());
|
||||||
observed.last_attempt = Some(Utc::now());
|
observed.last_attempt = Some(Utc::now());
|
||||||
|
info!(
|
||||||
|
target_system = %target_system,
|
||||||
|
switch_action = %desired.switch_action,
|
||||||
|
"switching to target system"
|
||||||
|
);
|
||||||
self.switch_to_target(&target_system, &desired.switch_action)
|
self.switch_to_target(&target_system, &desired.switch_action)
|
||||||
.await?;
|
.await?;
|
||||||
|
info!("switch-to-configuration completed");
|
||||||
|
|
||||||
observed.configured_system = read_symlink_target("/nix/var/nix/profiles/system");
|
observed.configured_system = read_symlink_target("/nix/var/nix/profiles/system");
|
||||||
observed.current_system = read_symlink_target("/run/current-system");
|
observed.current_system = read_symlink_target("/run/current-system");
|
||||||
|
|
@ -327,15 +440,20 @@ impl Agent {
|
||||||
|
|
||||||
async fn build_target_system(&self, flake_ref: &str, configuration: &str) -> Result<String> {
|
async fn build_target_system(&self, flake_ref: &str, configuration: &str) -> Result<String> {
|
||||||
let flake_attr = target_flake_attr(flake_ref, configuration);
|
let flake_attr = target_flake_attr(flake_ref, configuration);
|
||||||
let output = run_command(
|
info!(flake_attr = %flake_attr, "building target system");
|
||||||
"nix",
|
let mut build_args = vec![
|
||||||
&["build", "--no-link", "--print-out-paths", flake_attr.as_str()],
|
"build",
|
||||||
)
|
"-L",
|
||||||
.await?;
|
"--no-link",
|
||||||
|
"--no-write-lock-file",
|
||||||
|
"--print-out-paths",
|
||||||
|
];
|
||||||
|
build_args.push(flake_attr.as_str());
|
||||||
|
let output = run_command("nix", &build_args).await?;
|
||||||
let path = output
|
let path = output
|
||||||
.lines()
|
.lines()
|
||||||
.find(|line| !line.trim().is_empty())
|
|
||||||
.map(str::trim)
|
.map(str::trim)
|
||||||
|
.find(|line| line.starts_with("/nix/store/"))
|
||||||
.ok_or_else(|| anyhow!("nix build returned no output path"))?;
|
.ok_or_else(|| anyhow!("nix build returned no output path"))?;
|
||||||
Ok(path.to_string())
|
Ok(path.to_string())
|
||||||
}
|
}
|
||||||
|
|
@ -349,7 +467,12 @@ impl Agent {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
run_command(
|
info!(
|
||||||
|
switch_bin = %switch_bin.display(),
|
||||||
|
switch_action = %switch_action,
|
||||||
|
"executing switch-to-configuration"
|
||||||
|
);
|
||||||
|
run_command_inherit_output(
|
||||||
switch_bin
|
switch_bin
|
||||||
.to_str()
|
.to_str()
|
||||||
.ok_or_else(|| anyhow!("invalid switch path"))?,
|
.ok_or_else(|| anyhow!("invalid switch path"))?,
|
||||||
|
|
@ -369,9 +492,15 @@ impl Agent {
|
||||||
return Ok(HealthCheckOutcome::Passed);
|
return Ok(HealthCheckOutcome::Passed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
command = ?desired.health_check_command,
|
||||||
|
rollback_on_failure = desired.rollback_on_failure,
|
||||||
|
"running post-activation health check"
|
||||||
|
);
|
||||||
if let Err(error) = run_vec_command(&desired.health_check_command).await {
|
if let Err(error) = run_vec_command(&desired.health_check_command).await {
|
||||||
let error_message = format!("health check failed after activation: {error}");
|
let error_message = format!("health check failed after activation: {error}");
|
||||||
if desired.rollback_on_failure {
|
if desired.rollback_on_failure {
|
||||||
|
info!("health check failed; rolling back to previous system");
|
||||||
self.rollback_to_previous(previous_system).await?;
|
self.rollback_to_previous(previous_system).await?;
|
||||||
observed.configured_system = read_symlink_target("/nix/var/nix/profiles/system");
|
observed.configured_system = read_symlink_target("/nix/var/nix/profiles/system");
|
||||||
observed.current_system = read_symlink_target("/run/current-system");
|
observed.current_system = read_symlink_target("/run/current-system");
|
||||||
|
|
@ -385,6 +514,7 @@ impl Agent {
|
||||||
return Err(anyhow!(error_message));
|
return Err(anyhow!(error_message));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("post-activation health check passed");
|
||||||
Ok(HealthCheckOutcome::Passed)
|
Ok(HealthCheckOutcome::Passed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -392,7 +522,42 @@ impl Agent {
|
||||||
let previous_system = previous_system
|
let previous_system = previous_system
|
||||||
.filter(|value| !value.is_empty())
|
.filter(|value| !value.is_empty())
|
||||||
.ok_or_else(|| anyhow!("rollback requested but no previous system is known"))?;
|
.ok_or_else(|| anyhow!("rollback requested but no previous system is known"))?;
|
||||||
self.switch_to_target(previous_system, "switch").await
|
info!(previous_system = %previous_system, "rolling back to previous system");
|
||||||
|
let switch_bin = Path::new(previous_system).join("bin/switch-to-configuration");
|
||||||
|
if switch_bin.exists() {
|
||||||
|
return self.switch_to_target(previous_system, "switch").await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let activate = Path::new(previous_system).join("activate");
|
||||||
|
if !activate.exists() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"previous system {} does not contain switch-to-configuration or activate",
|
||||||
|
previous_system
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
previous_system = %previous_system,
|
||||||
|
activate = %activate.display(),
|
||||||
|
"previous system lacks switch-to-configuration; falling back to profile set + activate"
|
||||||
|
);
|
||||||
|
run_command(
|
||||||
|
"nix-env",
|
||||||
|
&[
|
||||||
|
"--profile",
|
||||||
|
"/nix/var/nix/profiles/system",
|
||||||
|
"--set",
|
||||||
|
previous_system,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
run_command_inherit_output(
|
||||||
|
activate
|
||||||
|
.to_str()
|
||||||
|
.ok_or_else(|| anyhow!("invalid activate path"))?,
|
||||||
|
&[],
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -458,6 +623,8 @@ fn read_symlink_target(path: &str) -> Option<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_command(program: &str, args: &[&str]) -> Result<String> {
|
async fn run_command(program: &str, args: &[&str]) -> Result<String> {
|
||||||
|
let started_at = Instant::now();
|
||||||
|
info!(program = %program, args = ?args, "running command");
|
||||||
let output = Command::new(program)
|
let output = Command::new(program)
|
||||||
.args(args)
|
.args(args)
|
||||||
.stdin(Stdio::null())
|
.stdin(Stdio::null())
|
||||||
|
|
@ -468,10 +635,25 @@ async fn run_command(program: &str, args: &[&str]) -> Result<String> {
|
||||||
.with_context(|| format!("failed to execute {}", program))?;
|
.with_context(|| format!("failed to execute {}", program))?;
|
||||||
|
|
||||||
if output.status.success() {
|
if output.status.success() {
|
||||||
|
info!(
|
||||||
|
program = %program,
|
||||||
|
args = ?args,
|
||||||
|
elapsed_ms = started_at.elapsed().as_millis(),
|
||||||
|
"command completed successfully"
|
||||||
|
);
|
||||||
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
|
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
|
||||||
} else {
|
} else {
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
|
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||||
|
warn!(
|
||||||
|
program = %program,
|
||||||
|
args = ?args,
|
||||||
|
elapsed_ms = started_at.elapsed().as_millis(),
|
||||||
|
status = %output.status,
|
||||||
|
stdout = %stdout,
|
||||||
|
stderr = %stderr,
|
||||||
|
"command failed"
|
||||||
|
);
|
||||||
Err(anyhow!(
|
Err(anyhow!(
|
||||||
"{} {:?} failed with status {}: stdout='{}' stderr='{}'",
|
"{} {:?} failed with status {}: stdout='{}' stderr='{}'",
|
||||||
program,
|
program,
|
||||||
|
|
@ -491,6 +673,47 @@ async fn run_vec_command(command: &[String]) -> Result<String> {
|
||||||
run_command(program, &arg_refs).await
|
run_command(program, &arg_refs).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn run_command_inherit_output(program: &str, args: &[&str]) -> Result<()> {
|
||||||
|
let started_at = Instant::now();
|
||||||
|
info!(
|
||||||
|
program = %program,
|
||||||
|
args = ?args,
|
||||||
|
"running command with inherited output"
|
||||||
|
);
|
||||||
|
let status = Command::new(program)
|
||||||
|
.args(args)
|
||||||
|
.stdin(Stdio::null())
|
||||||
|
.stdout(Stdio::inherit())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.status()
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to execute {}", program))?;
|
||||||
|
|
||||||
|
if status.success() {
|
||||||
|
info!(
|
||||||
|
program = %program,
|
||||||
|
args = ?args,
|
||||||
|
elapsed_ms = started_at.elapsed().as_millis(),
|
||||||
|
"command completed successfully"
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
program = %program,
|
||||||
|
args = ?args,
|
||||||
|
elapsed_ms = started_at.elapsed().as_millis(),
|
||||||
|
status = %status,
|
||||||
|
"command failed"
|
||||||
|
);
|
||||||
|
Err(anyhow!(
|
||||||
|
"{} {:?} failed with status {}",
|
||||||
|
program,
|
||||||
|
args,
|
||||||
|
status
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
|
|
@ -543,6 +766,12 @@ mod tests {
|
||||||
}),
|
}),
|
||||||
hardware_facts: None,
|
hardware_facts: None,
|
||||||
state: Some("active".to_string()),
|
state: Some("active".to_string()),
|
||||||
|
commission_state: None,
|
||||||
|
install_state: None,
|
||||||
|
commissioned_at: None,
|
||||||
|
last_inventory_hash: None,
|
||||||
|
power_state: None,
|
||||||
|
bmc_ref: None,
|
||||||
last_heartbeat: None,
|
last_heartbeat: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -568,11 +797,13 @@ mod tests {
|
||||||
fn resolve_desired_system_prefers_chainfire_spec() {
|
fn resolve_desired_system_prefers_chainfire_spec() {
|
||||||
let desired = DesiredSystemSpec {
|
let desired = DesiredSystemSpec {
|
||||||
node_id: "node01".to_string(),
|
node_id: "node01".to_string(),
|
||||||
|
deployment_id: None,
|
||||||
nixos_configuration: Some("node01-next".to_string()),
|
nixos_configuration: Some("node01-next".to_string()),
|
||||||
flake_ref: Some("github:centra/cloud".to_string()),
|
flake_ref: Some("github:centra/cloud".to_string()),
|
||||||
switch_action: Some("boot".to_string()),
|
switch_action: Some("boot".to_string()),
|
||||||
health_check_command: vec!["true".to_string()],
|
health_check_command: vec!["true".to_string()],
|
||||||
rollback_on_failure: Some(true),
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(false),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resolved = resolve_desired_system(
|
let resolved = resolve_desired_system(
|
||||||
|
|
@ -595,11 +826,13 @@ mod tests {
|
||||||
fn resolve_desired_system_uses_local_health_check_defaults_when_spec_omits_them() {
|
fn resolve_desired_system_uses_local_health_check_defaults_when_spec_omits_them() {
|
||||||
let desired = DesiredSystemSpec {
|
let desired = DesiredSystemSpec {
|
||||||
node_id: "node01".to_string(),
|
node_id: "node01".to_string(),
|
||||||
|
deployment_id: None,
|
||||||
nixos_configuration: Some("node01-next".to_string()),
|
nixos_configuration: Some("node01-next".to_string()),
|
||||||
flake_ref: None,
|
flake_ref: None,
|
||||||
switch_action: None,
|
switch_action: None,
|
||||||
health_check_command: Vec::new(),
|
health_check_command: Vec::new(),
|
||||||
rollback_on_failure: None,
|
rollback_on_failure: None,
|
||||||
|
drain_before_apply: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let resolved = resolve_desired_system(
|
let resolved = resolve_desired_system(
|
||||||
|
|
@ -631,7 +864,10 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn read_symlink_target_returns_none_for_missing_path() {
|
fn read_symlink_target_returns_none_for_missing_path() {
|
||||||
assert_eq!(read_symlink_target("/tmp/photoncloud-nix-agent-missing-link"), None);
|
assert_eq!(
|
||||||
|
read_symlink_target("/tmp/photoncloud-nix-agent-missing-link"),
|
||||||
|
None
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@ repository.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
chainfire-client.workspace = true
|
||||||
|
chrono.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
|
|
@ -16,5 +18,6 @@ tracing.workspace = true
|
||||||
tracing-subscriber.workspace = true
|
tracing-subscriber.workspace = true
|
||||||
fiberlb-api.workspace = true
|
fiberlb-api.workspace = true
|
||||||
flashdns-api.workspace = true
|
flashdns-api.workspace = true
|
||||||
|
deployer-types.workspace = true
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
tonic = "0.12"
|
tonic = "0.12"
|
||||||
|
|
|
||||||
823
deployer/crates/plasmacloud-reconciler/src/hosts.rs
Normal file
823
deployer/crates/plasmacloud-reconciler/src/hosts.rs
Normal file
|
|
@ -0,0 +1,823 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use chainfire_client::Client;
|
||||||
|
use chrono::Utc;
|
||||||
|
use clap::Args;
|
||||||
|
use deployer_types::{
|
||||||
|
ClusterNodeRecord, CommissionState, DesiredSystemSpec, HostDeploymentSelector,
|
||||||
|
HostDeploymentSpec, HostDeploymentStatus, InstallState, ObservedSystemState, ServiceInstanceSpec,
|
||||||
|
};
|
||||||
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
|
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_desired_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}nodes/{}/desired-system",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
node_id
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_host_deployment_status(
|
||||||
|
cluster_namespace: &str,
|
||||||
|
cluster_id: &str,
|
||||||
|
deployment_name: &str,
|
||||||
|
) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}deployments/hosts/{}/status",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id),
|
||||||
|
deployment_name
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Args)]
|
||||||
|
pub struct HostsCommand {
|
||||||
|
#[arg(long)]
|
||||||
|
pub endpoint: String,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "photoncloud")]
|
||||||
|
pub cluster_namespace: String,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
pub cluster_id: String,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 15)]
|
||||||
|
pub interval_secs: u64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 300)]
|
||||||
|
pub heartbeat_timeout_secs: u64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
pub dry_run: bool,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
pub once: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(command: HostsCommand) -> Result<()> {
|
||||||
|
let controller = HostDeploymentController::new(command);
|
||||||
|
if controller.once {
|
||||||
|
controller.reconcile_once().await
|
||||||
|
} else {
|
||||||
|
loop {
|
||||||
|
if let Err(error) = controller.reconcile_once().await {
|
||||||
|
warn!(error = %error, "host deployment reconciliation failed");
|
||||||
|
}
|
||||||
|
sleep(controller.interval).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct HostDeploymentController {
|
||||||
|
endpoint: String,
|
||||||
|
cluster_namespace: String,
|
||||||
|
cluster_id: String,
|
||||||
|
interval: Duration,
|
||||||
|
heartbeat_timeout_secs: u64,
|
||||||
|
dry_run: bool,
|
||||||
|
once: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HostDeploymentController {
|
||||||
|
fn new(command: HostsCommand) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint: command.endpoint,
|
||||||
|
cluster_namespace: command.cluster_namespace,
|
||||||
|
cluster_id: command.cluster_id,
|
||||||
|
interval: Duration::from_secs(command.interval_secs),
|
||||||
|
heartbeat_timeout_secs: command.heartbeat_timeout_secs,
|
||||||
|
dry_run: command.dry_run,
|
||||||
|
once: command.once,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reconcile_once(&self) -> Result<()> {
|
||||||
|
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||||
|
let nodes = self.load_nodes(&mut client).await?;
|
||||||
|
let desired_systems = self.load_desired_systems(&mut client).await?;
|
||||||
|
let observed_systems = self.load_observed_systems(&mut client).await?;
|
||||||
|
let instances = self.load_instances(&mut client).await?;
|
||||||
|
let deployments = self.load_host_deployments(&mut client).await?;
|
||||||
|
let statuses = self.load_host_deployment_statuses(&mut client).await?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
nodes = nodes.len(),
|
||||||
|
deployments = deployments.len(),
|
||||||
|
instances = instances.len(),
|
||||||
|
"loaded host deployment inputs"
|
||||||
|
);
|
||||||
|
|
||||||
|
for deployment in deployments {
|
||||||
|
let existing_status = statuses.get(&deployment.name).cloned();
|
||||||
|
let plan = plan_host_deployment(
|
||||||
|
&deployment,
|
||||||
|
existing_status.as_ref(),
|
||||||
|
&nodes,
|
||||||
|
&desired_systems,
|
||||||
|
&observed_systems,
|
||||||
|
&instances,
|
||||||
|
self.heartbeat_timeout_secs,
|
||||||
|
);
|
||||||
|
|
||||||
|
if self.dry_run {
|
||||||
|
info!(
|
||||||
|
deployment = %deployment.name,
|
||||||
|
phase = plan.status.phase.as_deref().unwrap_or("unknown"),
|
||||||
|
desired_upserts = plan.desired_upserts.len(),
|
||||||
|
desired_deletes = plan.desired_deletes.len(),
|
||||||
|
node_updates = plan.node_updates.len(),
|
||||||
|
"would reconcile host deployment"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for desired in &plan.desired_upserts {
|
||||||
|
client
|
||||||
|
.put(
|
||||||
|
&key_desired_system(
|
||||||
|
&self.cluster_namespace,
|
||||||
|
&self.cluster_id,
|
||||||
|
&desired.node_id,
|
||||||
|
),
|
||||||
|
&serde_json::to_vec(desired)?,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
for node_id in &plan.desired_deletes {
|
||||||
|
client
|
||||||
|
.delete(&key_desired_system(
|
||||||
|
&self.cluster_namespace,
|
||||||
|
&self.cluster_id,
|
||||||
|
node_id,
|
||||||
|
))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
for node in plan.node_updates.values() {
|
||||||
|
client
|
||||||
|
.put(
|
||||||
|
&key_node(&self.cluster_namespace, &self.cluster_id, &node.node_id),
|
||||||
|
&serde_json::to_vec(node)?,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
client
|
||||||
|
.put(
|
||||||
|
&key_host_deployment_status(
|
||||||
|
&self.cluster_namespace,
|
||||||
|
&self.cluster_id,
|
||||||
|
&deployment.name,
|
||||||
|
),
|
||||||
|
&serde_json::to_vec(&plan.status)?,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_nodes(&self, client: &mut Client) -> Result<Vec<ClusterNodeRecord>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}nodes/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut nodes = Vec::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
let Some(suffix) = key.strip_prefix(&prefix) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if suffix.contains('/') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match serde_json::from_slice::<ClusterNodeRecord>(&value) {
|
||||||
|
Ok(node) => nodes.push(node),
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode cluster node"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.sort_by(|lhs, rhs| lhs.node_id.cmp(&rhs.node_id));
|
||||||
|
Ok(nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_desired_systems(
|
||||||
|
&self,
|
||||||
|
client: &mut Client,
|
||||||
|
) -> Result<HashMap<String, DesiredSystemSpec>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}nodes/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut desired = HashMap::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
if !key.ends_with("/desired-system") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match serde_json::from_slice::<DesiredSystemSpec>(&value) {
|
||||||
|
Ok(spec) => {
|
||||||
|
desired.insert(spec.node_id.clone(), spec);
|
||||||
|
}
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode desired-system"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(desired)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_observed_systems(
|
||||||
|
&self,
|
||||||
|
client: &mut Client,
|
||||||
|
) -> Result<HashMap<String, ObservedSystemState>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}nodes/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut observed = HashMap::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
if !key.ends_with("/observed-system") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match serde_json::from_slice::<ObservedSystemState>(&value) {
|
||||||
|
Ok(state) => {
|
||||||
|
observed.insert(state.node_id.clone(), state);
|
||||||
|
}
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode observed-system"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(observed)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_instances(&self, client: &mut Client) -> Result<Vec<ServiceInstanceSpec>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}instances/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut instances = Vec::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
match serde_json::from_slice::<ServiceInstanceSpec>(&value) {
|
||||||
|
Ok(instance) => instances.push(instance),
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode service instance"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(instances)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_host_deployments(&self, client: &mut Client) -> Result<Vec<HostDeploymentSpec>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}deployments/hosts/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut deployments = Vec::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
if !key.ends_with("/spec") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match serde_json::from_slice::<HostDeploymentSpec>(&value) {
|
||||||
|
Ok(spec) => deployments.push(spec),
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode host deployment"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deployments.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name));
|
||||||
|
Ok(deployments)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_host_deployment_statuses(
|
||||||
|
&self,
|
||||||
|
client: &mut Client,
|
||||||
|
) -> Result<HashMap<String, HostDeploymentStatus>> {
|
||||||
|
let prefix = format!(
|
||||||
|
"{}deployments/hosts/",
|
||||||
|
cluster_prefix(&self.cluster_namespace, &self.cluster_id)
|
||||||
|
);
|
||||||
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
||||||
|
let mut statuses = HashMap::new();
|
||||||
|
|
||||||
|
for (key, value) in kvs {
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
if !key.ends_with("/status") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match serde_json::from_slice::<HostDeploymentStatus>(&value) {
|
||||||
|
Ok(status) => {
|
||||||
|
statuses.insert(status.name.clone(), status);
|
||||||
|
}
|
||||||
|
Err(error) => warn!(error = %error, key = %key, "failed to decode host deployment status"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(statuses)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct HostDeploymentPlan {
|
||||||
|
status: HostDeploymentStatus,
|
||||||
|
desired_upserts: Vec<DesiredSystemSpec>,
|
||||||
|
desired_deletes: Vec<String>,
|
||||||
|
node_updates: BTreeMap<String, ClusterNodeRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn plan_host_deployment(
|
||||||
|
deployment: &HostDeploymentSpec,
|
||||||
|
existing_status: Option<&HostDeploymentStatus>,
|
||||||
|
nodes: &[ClusterNodeRecord],
|
||||||
|
desired_systems: &HashMap<String, DesiredSystemSpec>,
|
||||||
|
observed_systems: &HashMap<String, ObservedSystemState>,
|
||||||
|
instances: &[ServiceInstanceSpec],
|
||||||
|
heartbeat_timeout_secs: u64,
|
||||||
|
) -> HostDeploymentPlan {
|
||||||
|
let now = Utc::now();
|
||||||
|
let target_configuration = deployment.nixos_configuration.clone();
|
||||||
|
let selector_matches = select_nodes(nodes, &deployment.selector);
|
||||||
|
let selected_node_ids = selector_matches
|
||||||
|
.iter()
|
||||||
|
.map(|node| node.node_id.clone())
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
let instance_counts = active_instances_per_node(instances);
|
||||||
|
let mut completed = Vec::new();
|
||||||
|
let mut in_progress = Vec::new();
|
||||||
|
let mut failed = Vec::new();
|
||||||
|
let mut eligible_candidates = Vec::new();
|
||||||
|
let mut desired_upserts = Vec::new();
|
||||||
|
let mut node_updates = BTreeMap::new();
|
||||||
|
let batch_size = deployment.batch_size.unwrap_or(1).max(1) as usize;
|
||||||
|
let max_unavailable = deployment.max_unavailable.unwrap_or(1).max(1) as usize;
|
||||||
|
let operator_paused = existing_status
|
||||||
|
.map(|status| status.paused_by_operator)
|
||||||
|
.unwrap_or(false);
|
||||||
|
let spec_paused = deployment.paused.unwrap_or(false);
|
||||||
|
let mut desired_deletes = desired_systems
|
||||||
|
.iter()
|
||||||
|
.filter(|(node_id, desired)| {
|
||||||
|
desired.deployment_id.as_deref() == Some(deployment.name.as_str())
|
||||||
|
&& !selected_node_ids.contains(node_id.as_str())
|
||||||
|
})
|
||||||
|
.map(|(node_id, _)| node_id.clone())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
for node in &selector_matches {
|
||||||
|
let desired = desired_systems.get(&node.node_id);
|
||||||
|
let observed = observed_systems.get(&node.node_id);
|
||||||
|
let is_completed =
|
||||||
|
is_node_completed(deployment, node, desired, observed, target_configuration.as_deref());
|
||||||
|
let is_failed = is_node_failed(deployment, desired, observed);
|
||||||
|
let is_in_progress = is_node_in_progress(deployment, desired, observed, is_completed, is_failed)
|
||||||
|
|| (deployment.drain_before_apply == Some(true)
|
||||||
|
&& node.state.as_deref() == Some("draining")
|
||||||
|
&& instance_counts.get(&node.node_id).copied().unwrap_or_default() > 0);
|
||||||
|
|
||||||
|
if is_completed {
|
||||||
|
completed.push(node.node_id.clone());
|
||||||
|
if deployment.drain_before_apply == Some(true) && node.state.as_deref() == Some("draining")
|
||||||
|
{
|
||||||
|
let mut updated = (*node).clone();
|
||||||
|
updated.state = Some("active".to_string());
|
||||||
|
node_updates.insert(updated.node_id.clone(), updated);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_failed {
|
||||||
|
failed.push(node.node_id.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_in_progress {
|
||||||
|
in_progress.push(node.node_id.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if node_is_rollout_candidate(node, heartbeat_timeout_secs) {
|
||||||
|
eligible_candidates.push((*node).clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let unavailable = in_progress.len() + failed.len();
|
||||||
|
let paused = operator_paused || spec_paused || !failed.is_empty();
|
||||||
|
let remaining_unavailable_budget = max_unavailable.saturating_sub(unavailable);
|
||||||
|
let remaining_batch_budget = batch_size.saturating_sub(in_progress.len());
|
||||||
|
let max_starts = if deployment.nixos_configuration.is_some() {
|
||||||
|
remaining_unavailable_budget.min(remaining_batch_budget)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let mut planned = 0usize;
|
||||||
|
let mut newly_started = Vec::new();
|
||||||
|
|
||||||
|
if !paused && max_starts > 0 {
|
||||||
|
for node in eligible_candidates {
|
||||||
|
if planned >= max_starts {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let remaining_instances = instance_counts.get(&node.node_id).copied().unwrap_or_default();
|
||||||
|
if deployment.drain_before_apply == Some(true) && remaining_instances > 0 {
|
||||||
|
let mut updated = node.clone();
|
||||||
|
updated.state = Some("draining".to_string());
|
||||||
|
node_updates.insert(updated.node_id.clone(), updated);
|
||||||
|
in_progress.push(node.node_id.clone());
|
||||||
|
newly_started.push(node.node_id.clone());
|
||||||
|
planned += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let desired = DesiredSystemSpec {
|
||||||
|
node_id: node.node_id.clone(),
|
||||||
|
deployment_id: Some(deployment.name.clone()),
|
||||||
|
nixos_configuration: deployment.nixos_configuration.clone(),
|
||||||
|
flake_ref: deployment.flake_ref.clone(),
|
||||||
|
switch_action: deployment.switch_action.clone().or_else(|| Some("switch".to_string())),
|
||||||
|
health_check_command: deployment.health_check_command.clone(),
|
||||||
|
rollback_on_failure: Some(deployment.rollback_on_failure.unwrap_or(true)),
|
||||||
|
drain_before_apply: Some(deployment.drain_before_apply.unwrap_or(false)),
|
||||||
|
};
|
||||||
|
newly_started.push(node.node_id.clone());
|
||||||
|
in_progress.push(node.node_id.clone());
|
||||||
|
planned += 1;
|
||||||
|
if deployment.drain_before_apply == Some(true) && node.state.as_deref() != Some("draining")
|
||||||
|
{
|
||||||
|
let mut updated = node.clone();
|
||||||
|
updated.state = Some("draining".to_string());
|
||||||
|
node_updates.insert(updated.node_id.clone(), updated);
|
||||||
|
}
|
||||||
|
desired_upserts.push(desired);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut status = existing_status.cloned().unwrap_or_default();
|
||||||
|
status.name = deployment.name.clone();
|
||||||
|
status.selected_nodes = selector_matches.iter().map(|node| node.node_id.clone()).collect();
|
||||||
|
status.completed_nodes = dedup_sorted(completed);
|
||||||
|
status.in_progress_nodes = dedup_sorted(in_progress);
|
||||||
|
status.failed_nodes = dedup_sorted(failed);
|
||||||
|
status.paused_by_operator = operator_paused;
|
||||||
|
status.paused = paused;
|
||||||
|
status.phase = Some(if status.selected_nodes.is_empty() {
|
||||||
|
"idle"
|
||||||
|
} else if deployment.nixos_configuration.is_none() {
|
||||||
|
"invalid"
|
||||||
|
} else if status.paused {
|
||||||
|
"paused"
|
||||||
|
} else if status.completed_nodes.len() == status.selected_nodes.len() {
|
||||||
|
"completed"
|
||||||
|
} else if !newly_started.is_empty() || !status.in_progress_nodes.is_empty() {
|
||||||
|
"running"
|
||||||
|
} else {
|
||||||
|
"ready"
|
||||||
|
}
|
||||||
|
.to_string());
|
||||||
|
status.message = Some(format!(
|
||||||
|
"selected={} completed={} in_progress={} failed={} newly_started={}",
|
||||||
|
status.selected_nodes.len(),
|
||||||
|
status.completed_nodes.len(),
|
||||||
|
status.in_progress_nodes.len(),
|
||||||
|
status.failed_nodes.len(),
|
||||||
|
newly_started.len()
|
||||||
|
));
|
||||||
|
status.updated_at = Some(now);
|
||||||
|
|
||||||
|
HostDeploymentPlan {
|
||||||
|
status,
|
||||||
|
desired_upserts,
|
||||||
|
desired_deletes: {
|
||||||
|
desired_deletes.sort();
|
||||||
|
desired_deletes.dedup();
|
||||||
|
desired_deletes
|
||||||
|
},
|
||||||
|
node_updates,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn select_nodes<'a>(
|
||||||
|
nodes: &'a [ClusterNodeRecord],
|
||||||
|
selector: &HostDeploymentSelector,
|
||||||
|
) -> Vec<&'a ClusterNodeRecord> {
|
||||||
|
let explicit_nodes = selector.node_ids.iter().collect::<HashSet<_>>();
|
||||||
|
let explicit_mode = !explicit_nodes.is_empty();
|
||||||
|
let mut selected = nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|node| {
|
||||||
|
(!explicit_mode || explicit_nodes.contains(&node.node_id))
|
||||||
|
&& (selector.roles.is_empty()
|
||||||
|
|| node
|
||||||
|
.roles
|
||||||
|
.iter()
|
||||||
|
.any(|role| selector.roles.iter().any(|expected| expected == role)))
|
||||||
|
&& (selector.pools.is_empty()
|
||||||
|
|| node
|
||||||
|
.pool
|
||||||
|
.as_deref()
|
||||||
|
.map(|pool| selector.pools.iter().any(|expected| expected == pool))
|
||||||
|
.unwrap_or(false))
|
||||||
|
&& (selector.node_classes.is_empty()
|
||||||
|
|| node
|
||||||
|
.node_class
|
||||||
|
.as_deref()
|
||||||
|
.map(|node_class| {
|
||||||
|
selector
|
||||||
|
.node_classes
|
||||||
|
.iter()
|
||||||
|
.any(|expected| expected == node_class)
|
||||||
|
})
|
||||||
|
.unwrap_or(false))
|
||||||
|
&& selector
|
||||||
|
.match_labels
|
||||||
|
.iter()
|
||||||
|
.all(|(key, value)| node.labels.get(key) == Some(value))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
selected.sort_by(|lhs, rhs| lhs.node_id.cmp(&rhs.node_id));
|
||||||
|
selected
|
||||||
|
}
|
||||||
|
|
||||||
|
fn active_instances_per_node(instances: &[ServiceInstanceSpec]) -> HashMap<String, usize> {
|
||||||
|
let mut counts = HashMap::new();
|
||||||
|
for instance in instances {
|
||||||
|
if matches!(instance.state.as_deref(), Some("failed") | Some("deleted")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
*counts.entry(instance.node_id.clone()).or_insert(0usize) += 1;
|
||||||
|
}
|
||||||
|
counts
|
||||||
|
}
|
||||||
|
|
||||||
|
fn node_is_rollout_candidate(node: &ClusterNodeRecord, heartbeat_timeout_secs: u64) -> bool {
|
||||||
|
if matches!(
|
||||||
|
node.commission_state,
|
||||||
|
Some(CommissionState::Discovered | CommissionState::Commissioning)
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if matches!(
|
||||||
|
node.install_state,
|
||||||
|
Some(
|
||||||
|
InstallState::Installing | InstallState::Failed | InstallState::ReinstallRequested
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if !matches!(node.state.as_deref(), Some("active") | Some("draining")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if heartbeat_timeout_secs == 0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let Some(last) = node.last_heartbeat else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
Utc::now().signed_duration_since(last).num_seconds() <= heartbeat_timeout_secs as i64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_node_completed(
|
||||||
|
deployment: &HostDeploymentSpec,
|
||||||
|
_node: &ClusterNodeRecord,
|
||||||
|
desired: Option<&DesiredSystemSpec>,
|
||||||
|
observed: Option<&ObservedSystemState>,
|
||||||
|
target_configuration: Option<&str>,
|
||||||
|
) -> bool {
|
||||||
|
observed
|
||||||
|
.filter(|observed| observed.status.as_deref() == Some("active"))
|
||||||
|
.and_then(|observed| observed.nixos_configuration.as_deref())
|
||||||
|
.zip(target_configuration)
|
||||||
|
.map(|(observed_configuration, target)| observed_configuration == target)
|
||||||
|
.unwrap_or(false)
|
||||||
|
&& desired
|
||||||
|
.and_then(|desired| desired.deployment_id.as_deref())
|
||||||
|
.map(|deployment_id| deployment_id == deployment.name)
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_node_failed(
|
||||||
|
deployment: &HostDeploymentSpec,
|
||||||
|
desired: Option<&DesiredSystemSpec>,
|
||||||
|
observed: Option<&ObservedSystemState>,
|
||||||
|
) -> bool {
|
||||||
|
desired
|
||||||
|
.and_then(|desired| desired.deployment_id.as_deref())
|
||||||
|
.map(|deployment_id| deployment_id == deployment.name)
|
||||||
|
.unwrap_or(false)
|
||||||
|
&& observed
|
||||||
|
.and_then(|observed| observed.status.as_deref())
|
||||||
|
.map(|status| matches!(status, "failed" | "rolled-back"))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_node_in_progress(
|
||||||
|
deployment: &HostDeploymentSpec,
|
||||||
|
desired: Option<&DesiredSystemSpec>,
|
||||||
|
observed: Option<&ObservedSystemState>,
|
||||||
|
is_completed: bool,
|
||||||
|
is_failed: bool,
|
||||||
|
) -> bool {
|
||||||
|
if is_completed || is_failed {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
desired
|
||||||
|
.and_then(|desired| desired.deployment_id.as_deref())
|
||||||
|
.map(|deployment_id| deployment_id == deployment.name)
|
||||||
|
.unwrap_or(false)
|
||||||
|
|| observed
|
||||||
|
.and_then(|observed| observed.status.as_deref())
|
||||||
|
.map(|status| matches!(status, "planning" | "pending" | "reconciling" | "verifying" | "staged"))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dedup_sorted(mut values: Vec<String>) -> Vec<String> {
|
||||||
|
values.sort();
|
||||||
|
values.dedup();
|
||||||
|
values
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn test_node(node_id: &str, failure_domain: &str) -> ClusterNodeRecord {
|
||||||
|
ClusterNodeRecord {
|
||||||
|
node_id: node_id.to_string(),
|
||||||
|
machine_id: None,
|
||||||
|
ip: "10.0.0.1".to_string(),
|
||||||
|
hostname: node_id.to_string(),
|
||||||
|
roles: vec!["worker".to_string()],
|
||||||
|
labels: HashMap::from([
|
||||||
|
("tier".to_string(), "general".to_string()),
|
||||||
|
("failure_domain".to_string(), failure_domain.to_string()),
|
||||||
|
]),
|
||||||
|
pool: Some("general".to_string()),
|
||||||
|
node_class: Some("worker-linux".to_string()),
|
||||||
|
failure_domain: Some(failure_domain.to_string()),
|
||||||
|
nix_profile: None,
|
||||||
|
install_plan: None,
|
||||||
|
hardware_facts: None,
|
||||||
|
state: Some("active".to_string()),
|
||||||
|
commission_state: Some(CommissionState::Commissioned),
|
||||||
|
install_state: Some(InstallState::Installed),
|
||||||
|
commissioned_at: None,
|
||||||
|
last_inventory_hash: None,
|
||||||
|
power_state: None,
|
||||||
|
bmc_ref: None,
|
||||||
|
last_heartbeat: Some(Utc::now()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_deployment() -> HostDeploymentSpec {
|
||||||
|
HostDeploymentSpec {
|
||||||
|
name: "worker-rollout".to_string(),
|
||||||
|
selector: HostDeploymentSelector {
|
||||||
|
node_ids: vec![],
|
||||||
|
roles: vec!["worker".to_string()],
|
||||||
|
pools: vec!["general".to_string()],
|
||||||
|
node_classes: vec!["worker-linux".to_string()],
|
||||||
|
match_labels: HashMap::from([("tier".to_string(), "general".to_string())]),
|
||||||
|
},
|
||||||
|
nixos_configuration: Some("worker-golden".to_string()),
|
||||||
|
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
||||||
|
batch_size: Some(1),
|
||||||
|
max_unavailable: Some(1),
|
||||||
|
health_check_command: vec!["true".to_string()],
|
||||||
|
switch_action: Some("switch".to_string()),
|
||||||
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(false),
|
||||||
|
reboot_policy: None,
|
||||||
|
paused: Some(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_rollout_starts_one_node_per_batch() {
|
||||||
|
let deployment = test_deployment();
|
||||||
|
let nodes = vec![test_node("node01", "rack-a"), test_node("node02", "rack-b")];
|
||||||
|
let plan = plan_host_deployment(
|
||||||
|
&deployment,
|
||||||
|
None,
|
||||||
|
&nodes,
|
||||||
|
&HashMap::new(),
|
||||||
|
&HashMap::new(),
|
||||||
|
&[],
|
||||||
|
300,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(plan.desired_upserts.len(), 1);
|
||||||
|
assert_eq!(plan.status.in_progress_nodes, vec!["node01".to_string()]);
|
||||||
|
assert_eq!(plan.status.phase.as_deref(), Some("running"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_rollout_pauses_on_failed_node() {
|
||||||
|
let deployment = test_deployment();
|
||||||
|
let nodes = vec![test_node("node01", "rack-a"), test_node("node02", "rack-b")];
|
||||||
|
let desired = HashMap::from([(
|
||||||
|
"node01".to_string(),
|
||||||
|
DesiredSystemSpec {
|
||||||
|
node_id: "node01".to_string(),
|
||||||
|
deployment_id: Some("worker-rollout".to_string()),
|
||||||
|
nixos_configuration: Some("worker-golden".to_string()),
|
||||||
|
flake_ref: None,
|
||||||
|
switch_action: Some("switch".to_string()),
|
||||||
|
health_check_command: Vec::new(),
|
||||||
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(false),
|
||||||
|
},
|
||||||
|
)]);
|
||||||
|
let observed = HashMap::from([(
|
||||||
|
"node01".to_string(),
|
||||||
|
ObservedSystemState {
|
||||||
|
node_id: "node01".to_string(),
|
||||||
|
nixos_configuration: Some("worker-golden".to_string()),
|
||||||
|
status: Some("rolled-back".to_string()),
|
||||||
|
..ObservedSystemState::default()
|
||||||
|
},
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let plan = plan_host_deployment(
|
||||||
|
&deployment,
|
||||||
|
None,
|
||||||
|
&nodes,
|
||||||
|
&desired,
|
||||||
|
&observed,
|
||||||
|
&[],
|
||||||
|
300,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(plan.desired_upserts.is_empty());
|
||||||
|
assert!(plan.status.paused);
|
||||||
|
assert_eq!(plan.status.failed_nodes, vec!["node01".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_rollout_drains_before_apply_when_instances_exist() {
|
||||||
|
let mut deployment = test_deployment();
|
||||||
|
deployment.drain_before_apply = Some(true);
|
||||||
|
let nodes = vec![test_node("node01", "rack-a")];
|
||||||
|
let instances = vec![ServiceInstanceSpec {
|
||||||
|
instance_id: "api-node01".to_string(),
|
||||||
|
service: "api".to_string(),
|
||||||
|
node_id: "node01".to_string(),
|
||||||
|
ip: "10.0.0.1".to_string(),
|
||||||
|
port: 8080,
|
||||||
|
mesh_port: None,
|
||||||
|
version: None,
|
||||||
|
health_check: None,
|
||||||
|
process: None,
|
||||||
|
container: None,
|
||||||
|
managed_by: Some("fleet-scheduler".to_string()),
|
||||||
|
state: Some("active".to_string()),
|
||||||
|
last_heartbeat: None,
|
||||||
|
observed_at: None,
|
||||||
|
}];
|
||||||
|
|
||||||
|
let plan = plan_host_deployment(
|
||||||
|
&deployment,
|
||||||
|
None,
|
||||||
|
&nodes,
|
||||||
|
&HashMap::new(),
|
||||||
|
&HashMap::new(),
|
||||||
|
&instances,
|
||||||
|
300,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(plan.desired_upserts.is_empty());
|
||||||
|
assert_eq!(
|
||||||
|
plan.node_updates
|
||||||
|
.get("node01")
|
||||||
|
.and_then(|node| node.state.as_deref()),
|
||||||
|
Some("draining")
|
||||||
|
);
|
||||||
|
assert_eq!(plan.status.in_progress_nodes, vec!["node01".to_string()]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -29,9 +29,9 @@ use fiberlb_api::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use flashdns_api::RecordServiceClient;
|
use flashdns_api::RecordServiceClient;
|
||||||
use flashdns_api::ReverseZoneServiceClient;
|
|
||||||
use flashdns_api::ZoneServiceClient;
|
use flashdns_api::ZoneServiceClient;
|
||||||
use flashdns_api::proto::{
|
use flashdns_api::proto::{
|
||||||
|
reverse_zone_service_client::ReverseZoneServiceClient,
|
||||||
record_data, ARecord, AaaaRecord, CaaRecord, CnameRecord, CreateRecordRequest,
|
record_data, ARecord, AaaaRecord, CaaRecord, CnameRecord, CreateRecordRequest,
|
||||||
CreateReverseZoneRequest, CreateZoneRequest, DeleteRecordRequest, DeleteReverseZoneRequest,
|
CreateReverseZoneRequest, CreateZoneRequest, DeleteRecordRequest, DeleteReverseZoneRequest,
|
||||||
DeleteZoneRequest, ListReverseZonesRequest, MxRecord, NsRecord, PtrRecord, RecordData,
|
DeleteZoneRequest, ListReverseZonesRequest, MxRecord, NsRecord, PtrRecord, RecordData,
|
||||||
|
|
@ -39,6 +39,8 @@ use flashdns_api::proto::{
|
||||||
ZoneInfo,
|
ZoneInfo,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mod hosts;
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(author, version, about)]
|
#[command(author, version, about)]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
|
|
@ -71,6 +73,9 @@ enum Command {
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
prune: bool,
|
prune: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Reconcile host deployments into per-node desired-system state
|
||||||
|
Hosts(hosts::HostsCommand),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
|
@ -294,6 +299,9 @@ async fn main() -> Result<()> {
|
||||||
let spec: DnsConfig = read_json(&config).await?;
|
let spec: DnsConfig = read_json(&config).await?;
|
||||||
reconcile_dns(spec, endpoint, prune).await?;
|
reconcile_dns(spec, endpoint, prune).await?;
|
||||||
}
|
}
|
||||||
|
Command::Hosts(command) => {
|
||||||
|
hosts::run(command).await?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,30 @@ if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then
|
||||||
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
run_chainfire_server_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_deployer_server_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_DEPLOYER_SERVER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_DEPLOYER_SERVER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-server -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_deployer_ctl_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
tmp_dir="$(mktemp -d)"
|
tmp_dir="$(mktemp -d)"
|
||||||
cf_pid=""
|
cf_pid=""
|
||||||
deployer_pid=""
|
deployer_pid=""
|
||||||
|
|
@ -128,7 +152,7 @@ role = "voter"
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
||||||
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- \
|
run_chainfire_server_bin \
|
||||||
--config "$tmp_dir/chainfire.toml" \
|
--config "$tmp_dir/chainfire.toml" \
|
||||||
>"$tmp_dir/chainfire.log" 2>&1 &
|
>"$tmp_dir/chainfire.log" 2>&1 &
|
||||||
cf_pid="$!"
|
cf_pid="$!"
|
||||||
|
|
@ -155,7 +179,7 @@ namespace = "deployer"
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
echo "Starting Deployer on 127.0.0.1:${deployer_port}"
|
echo "Starting Deployer on 127.0.0.1:${deployer_port}"
|
||||||
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-server -- \
|
run_deployer_server_bin \
|
||||||
--config "$tmp_dir/deployer.toml" \
|
--config "$tmp_dir/deployer.toml" \
|
||||||
>"$tmp_dir/deployer.log" 2>&1 &
|
>"$tmp_dir/deployer.log" 2>&1 &
|
||||||
deployer_pid="$!"
|
deployer_pid="$!"
|
||||||
|
|
@ -240,7 +264,7 @@ chainfire_endpoint="http://127.0.0.1:${api_port}"
|
||||||
deployer_endpoint="http://127.0.0.1:${deployer_port}"
|
deployer_endpoint="http://127.0.0.1:${deployer_port}"
|
||||||
|
|
||||||
run_deployer_ctl() {
|
run_deployer_ctl() {
|
||||||
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- \
|
run_deployer_ctl_bin \
|
||||||
--chainfire-endpoint "$chainfire_endpoint" \
|
--chainfire-endpoint "$chainfire_endpoint" \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
--cluster-namespace photoncloud \
|
--cluster-namespace photoncloud \
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,38 @@ if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then
|
||||||
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
run_chainfire_server_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_deployer_ctl_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_node_agent_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_NODE_AGENT_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_NODE_AGENT_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p node-agent -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_fleet_scheduler_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_FLEET_SCHEDULER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_FLEET_SCHEDULER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p fleet-scheduler -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
tmp_dir="$(mktemp -d)"
|
tmp_dir="$(mktemp -d)"
|
||||||
cf_pid=""
|
cf_pid=""
|
||||||
|
|
||||||
|
|
@ -104,7 +136,7 @@ EOF
|
||||||
mkdir -p "$tmp_dir/pids"
|
mkdir -p "$tmp_dir/pids"
|
||||||
|
|
||||||
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
||||||
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- \
|
run_chainfire_server_bin \
|
||||||
--config "$tmp_dir/chainfire.toml" \
|
--config "$tmp_dir/chainfire.toml" \
|
||||||
>"$tmp_dir/chainfire.log" 2>&1 &
|
>"$tmp_dir/chainfire.log" 2>&1 &
|
||||||
cf_pid="$!"
|
cf_pid="$!"
|
||||||
|
|
@ -256,7 +288,7 @@ EOF
|
||||||
endpoint="http://127.0.0.1:${api_port}"
|
endpoint="http://127.0.0.1:${api_port}"
|
||||||
|
|
||||||
run_deployer_ctl() {
|
run_deployer_ctl() {
|
||||||
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- \
|
run_deployer_ctl_bin \
|
||||||
--chainfire-endpoint "$endpoint" \
|
--chainfire-endpoint "$endpoint" \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
"$@"
|
"$@"
|
||||||
|
|
@ -266,7 +298,7 @@ run_node_agent_once() {
|
||||||
local node_id="$1"
|
local node_id="$1"
|
||||||
local pid_dir="$tmp_dir/pids/$node_id"
|
local pid_dir="$tmp_dir/pids/$node_id"
|
||||||
mkdir -p "$pid_dir"
|
mkdir -p "$pid_dir"
|
||||||
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p node-agent -- \
|
run_node_agent_bin \
|
||||||
--chainfire-endpoint "$endpoint" \
|
--chainfire-endpoint "$endpoint" \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
--node-id "$node_id" \
|
--node-id "$node_id" \
|
||||||
|
|
@ -277,7 +309,7 @@ run_node_agent_once() {
|
||||||
}
|
}
|
||||||
|
|
||||||
run_scheduler_once() {
|
run_scheduler_once() {
|
||||||
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p fleet-scheduler -- \
|
run_fleet_scheduler_bin \
|
||||||
--chainfire-endpoint "$endpoint" \
|
--chainfire-endpoint "$endpoint" \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
--interval-secs 1 \
|
--interval-secs 1 \
|
||||||
|
|
|
||||||
431
deployer/scripts/verify-host-lifecycle-e2e.sh
Normal file
431
deployer/scripts/verify-host-lifecycle-e2e.sh
Normal file
|
|
@ -0,0 +1,431 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||||
|
|
||||||
|
if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then
|
||||||
|
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_chainfire_server_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_deployer_ctl_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_plasmacloud_reconciler_bin() {
|
||||||
|
if [[ -n "${PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN:-}" ]]; then
|
||||||
|
"$PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN" "$@"
|
||||||
|
else
|
||||||
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p plasmacloud-reconciler -- "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_dir="$(mktemp -d)"
|
||||||
|
cf_pid=""
|
||||||
|
redfish_pid=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
set +e
|
||||||
|
if [[ -n "$redfish_pid" ]]; then
|
||||||
|
kill "$redfish_pid" 2>/dev/null || true
|
||||||
|
wait "$redfish_pid" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
if [[ -n "$cf_pid" ]]; then
|
||||||
|
kill "$cf_pid" 2>/dev/null || true
|
||||||
|
wait "$cf_pid" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
rm -rf "$tmp_dir"
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
free_port() {
|
||||||
|
python3 - <<'PY'
|
||||||
|
import socket
|
||||||
|
s = socket.socket()
|
||||||
|
s.bind(("127.0.0.1", 0))
|
||||||
|
print(s.getsockname()[1])
|
||||||
|
s.close()
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_port() {
|
||||||
|
local host="$1"
|
||||||
|
local port="$2"
|
||||||
|
local timeout_secs="${3:-60}"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if python3 - "$host" "$port" <<'PY'
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
|
||||||
|
host = sys.argv[1]
|
||||||
|
port = int(sys.argv[2])
|
||||||
|
|
||||||
|
with socket.socket() as sock:
|
||||||
|
sock.settimeout(0.5)
|
||||||
|
try:
|
||||||
|
sock.connect((host, port))
|
||||||
|
except OSError:
|
||||||
|
raise SystemExit(1)
|
||||||
|
raise SystemExit(0)
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "timed out waiting for ${host}:${port}" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
api_port="$(free_port)"
|
||||||
|
http_port="$(free_port)"
|
||||||
|
raft_port="$(free_port)"
|
||||||
|
gossip_port="$(free_port)"
|
||||||
|
redfish_port="$(free_port)"
|
||||||
|
|
||||||
|
cat >"$tmp_dir/chainfire.toml" <<EOF
|
||||||
|
[node]
|
||||||
|
id = 1
|
||||||
|
name = "chainfire-1"
|
||||||
|
role = "control_plane"
|
||||||
|
|
||||||
|
[storage]
|
||||||
|
data_dir = "$tmp_dir/chainfire-data"
|
||||||
|
|
||||||
|
[network]
|
||||||
|
api_addr = "127.0.0.1:${api_port}"
|
||||||
|
http_addr = "127.0.0.1:${http_port}"
|
||||||
|
raft_addr = "127.0.0.1:${raft_port}"
|
||||||
|
gossip_addr = "127.0.0.1:${gossip_port}"
|
||||||
|
|
||||||
|
[cluster]
|
||||||
|
id = 1
|
||||||
|
initial_members = []
|
||||||
|
bootstrap = true
|
||||||
|
|
||||||
|
[raft]
|
||||||
|
role = "voter"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >"$tmp_dir/mock-redfish.py" <<'PY'
|
||||||
|
import http.server
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
port = int(sys.argv[1])
|
||||||
|
log_path = sys.argv[2]
|
||||||
|
|
||||||
|
class Handler(http.server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == "/redfish/v1/Systems/node01":
|
||||||
|
body = json.dumps({"PowerState": "On"}).encode("utf-8")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
return
|
||||||
|
self.send_error(404)
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path != "/redfish/v1/Systems/node01/Actions/ComputerSystem.Reset":
|
||||||
|
self.send_error(404)
|
||||||
|
return
|
||||||
|
length = int(self.headers.get("Content-Length", "0"))
|
||||||
|
payload = self.rfile.read(length).decode("utf-8")
|
||||||
|
with open(log_path, "a", encoding="utf-8") as handle:
|
||||||
|
handle.write(payload + "\n")
|
||||||
|
self.send_response(204)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
server = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
|
||||||
|
server.serve_forever()
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
||||||
|
run_chainfire_server_bin --config "$tmp_dir/chainfire.toml" >"$tmp_dir/chainfire.log" 2>&1 &
|
||||||
|
cf_pid="$!"
|
||||||
|
wait_for_port "127.0.0.1" "$api_port" 120
|
||||||
|
wait_for_port "127.0.0.1" "$http_port" 120
|
||||||
|
|
||||||
|
echo "Starting mock Redfish on 127.0.0.1:${redfish_port}"
|
||||||
|
python3 "$tmp_dir/mock-redfish.py" "$redfish_port" "$tmp_dir/redfish.log" >"$tmp_dir/redfish.stdout" 2>&1 &
|
||||||
|
redfish_pid="$!"
|
||||||
|
wait_for_port "127.0.0.1" "$redfish_port" 30
|
||||||
|
|
||||||
|
cat >"$tmp_dir/cluster.yaml" <<EOF
|
||||||
|
cluster:
|
||||||
|
cluster_id: test-cluster
|
||||||
|
environment: dev
|
||||||
|
|
||||||
|
node_classes:
|
||||||
|
- name: worker-linux
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
labels:
|
||||||
|
tier: general
|
||||||
|
|
||||||
|
pools:
|
||||||
|
- name: general
|
||||||
|
node_class: worker-linux
|
||||||
|
labels:
|
||||||
|
env: dev
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
- node_id: node01
|
||||||
|
hostname: node01
|
||||||
|
ip: 10.0.0.11
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
labels:
|
||||||
|
tier: general
|
||||||
|
pool: general
|
||||||
|
node_class: worker-linux
|
||||||
|
state: active
|
||||||
|
commission_state: commissioned
|
||||||
|
install_state: installed
|
||||||
|
bmc_ref: "redfish+http://127.0.0.1:${redfish_port}/redfish/v1/Systems/node01"
|
||||||
|
- node_id: node02
|
||||||
|
hostname: node02
|
||||||
|
ip: 10.0.0.12
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
labels:
|
||||||
|
tier: general
|
||||||
|
pool: general
|
||||||
|
node_class: worker-linux
|
||||||
|
state: active
|
||||||
|
commission_state: commissioned
|
||||||
|
install_state: installed
|
||||||
|
|
||||||
|
host_deployments:
|
||||||
|
- name: worker-rollout
|
||||||
|
selector:
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
pools:
|
||||||
|
- general
|
||||||
|
node_classes:
|
||||||
|
- worker-linux
|
||||||
|
match_labels:
|
||||||
|
tier: general
|
||||||
|
nixos_configuration: worker-next
|
||||||
|
flake_ref: "github:centra/cloud"
|
||||||
|
batch_size: 1
|
||||||
|
max_unavailable: 1
|
||||||
|
health_check_command:
|
||||||
|
- "true"
|
||||||
|
switch_action: switch
|
||||||
|
rollback_on_failure: true
|
||||||
|
EOF
|
||||||
|
|
||||||
|
chainfire_endpoint="http://127.0.0.1:${api_port}"
|
||||||
|
|
||||||
|
run_deployer_ctl() {
|
||||||
|
run_deployer_ctl_bin \
|
||||||
|
--chainfire-endpoint "$chainfire_endpoint" \
|
||||||
|
--cluster-id test-cluster \
|
||||||
|
--cluster-namespace photoncloud \
|
||||||
|
--deployer-namespace deployer \
|
||||||
|
"$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_hosts_once() {
|
||||||
|
run_plasmacloud_reconciler_bin \
|
||||||
|
hosts \
|
||||||
|
--endpoint "$chainfire_endpoint" \
|
||||||
|
--cluster-namespace photoncloud \
|
||||||
|
--cluster-id test-cluster \
|
||||||
|
--heartbeat-timeout-secs 300 \
|
||||||
|
--once
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Applying host lifecycle cluster config"
|
||||||
|
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml" --prune
|
||||||
|
|
||||||
|
echo "Running host rollout controller"
|
||||||
|
run_hosts_once
|
||||||
|
|
||||||
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-1.json"
|
||||||
|
python3 - "$tmp_dir/deployment-1.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
status = payload["status"]
|
||||||
|
assert status["phase"] == "running", payload
|
||||||
|
assert status["in_progress_nodes"] == ["node01"], payload
|
||||||
|
assert status["failed_nodes"] == [], payload
|
||||||
|
print("initial rollout wave validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-1.dump"
|
||||||
|
python3 - "$tmp_dir/nodes-1.dump" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
desired = {}
|
||||||
|
with open(sys.argv[1], "r", encoding="utf-8") as handle:
|
||||||
|
for line in handle:
|
||||||
|
if " key=" not in line or " value=" not in line:
|
||||||
|
continue
|
||||||
|
key = line.split(" key=", 1)[1].split(" value=", 1)[0]
|
||||||
|
if not key.endswith("/desired-system"):
|
||||||
|
continue
|
||||||
|
payload = json.loads(line.split(" value=", 1)[1])
|
||||||
|
desired[payload["node_id"]] = payload
|
||||||
|
|
||||||
|
assert sorted(desired) == ["node01"], desired
|
||||||
|
assert desired["node01"]["deployment_id"] == "worker-rollout", desired
|
||||||
|
print("desired-system first wave validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Pausing and resuming deployment via CLI"
|
||||||
|
run_deployer_ctl deployment pause --name worker-rollout >"$tmp_dir/pause.json"
|
||||||
|
python3 - "$tmp_dir/pause.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
assert payload["paused"] is True, payload
|
||||||
|
assert payload["paused_by_operator"] is True, payload
|
||||||
|
print("pause command validated")
|
||||||
|
PY
|
||||||
|
run_deployer_ctl deployment resume --name worker-rollout >"$tmp_dir/resume.json"
|
||||||
|
python3 - "$tmp_dir/resume.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
assert payload["paused"] is False, payload
|
||||||
|
assert payload["paused_by_operator"] is False, payload
|
||||||
|
print("resume command validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Marking node01 rollout complete and reconciling next wave"
|
||||||
|
run_deployer_ctl node set-observed \
|
||||||
|
--node-id node01 \
|
||||||
|
--status active \
|
||||||
|
--nixos-configuration worker-next >/dev/null
|
||||||
|
run_hosts_once
|
||||||
|
|
||||||
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-2.json"
|
||||||
|
python3 - "$tmp_dir/deployment-2.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
status = payload["status"]
|
||||||
|
assert status["completed_nodes"] == ["node01"], payload
|
||||||
|
assert status["in_progress_nodes"] == ["node02"], payload
|
||||||
|
print("second rollout wave validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Marking node02 rollout failed and validating auto-pause"
|
||||||
|
run_deployer_ctl node set-observed \
|
||||||
|
--node-id node02 \
|
||||||
|
--status rolled-back \
|
||||||
|
--nixos-configuration worker-next >/dev/null
|
||||||
|
run_hosts_once
|
||||||
|
|
||||||
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-3.json"
|
||||||
|
python3 - "$tmp_dir/deployment-3.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
status = payload["status"]
|
||||||
|
assert status["paused"] is True, payload
|
||||||
|
assert status["failed_nodes"] == ["node02"], payload
|
||||||
|
print("auto-pause on failure validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Refreshing power state through Redfish"
|
||||||
|
run_deployer_ctl node power --node-id node01 --action refresh >"$tmp_dir/node-power.json"
|
||||||
|
python3 - "$tmp_dir/node-power.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
assert payload["power_state"] == "on", payload
|
||||||
|
print("power refresh validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Requesting reinstall with power cycle"
|
||||||
|
run_deployer_ctl node reinstall --node-id node01 --power-cycle >"$tmp_dir/node-reinstall.json"
|
||||||
|
python3 - "$tmp_dir/node-reinstall.json" "$tmp_dir/redfish.log" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
assert payload["state"] == "provisioning", payload
|
||||||
|
assert payload["install_state"] == "reinstall_requested", payload
|
||||||
|
assert payload["power_state"] == "cycling", payload
|
||||||
|
|
||||||
|
lines = [line.strip() for line in open(sys.argv[2], "r", encoding="utf-8") if line.strip()]
|
||||||
|
assert any('"ResetType":"PowerCycle"' in line for line in lines), lines
|
||||||
|
print("reinstall orchestration validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node01" >"$tmp_dir/node01-post-reinstall.dump"
|
||||||
|
python3 - "$tmp_dir/node01-post-reinstall.dump" <<'PY'
|
||||||
|
import sys
|
||||||
|
|
||||||
|
lines = [line.strip() for line in open(sys.argv[1], "r", encoding="utf-8")]
|
||||||
|
assert not any("/desired-system" in line for line in lines), lines
|
||||||
|
assert not any("/observed-system" in line for line in lines), lines
|
||||||
|
print("reinstall state cleanup validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Aborting deployment and clearing desired-system"
|
||||||
|
run_deployer_ctl deployment abort --name worker-rollout >"$tmp_dir/abort.json"
|
||||||
|
python3 - "$tmp_dir/abort.json" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
assert payload["phase"] == "aborted", payload
|
||||||
|
assert payload["paused"] is True, payload
|
||||||
|
print("abort command validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-2.dump"
|
||||||
|
python3 - "$tmp_dir/nodes-2.dump" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
desired_nodes = []
|
||||||
|
with open(sys.argv[1], "r", encoding="utf-8") as handle:
|
||||||
|
for line in handle:
|
||||||
|
if " key=" not in line or " value=" not in line:
|
||||||
|
continue
|
||||||
|
key = line.split(" key=", 1)[1].split(" value=", 1)[0]
|
||||||
|
if not key.endswith("/desired-system"):
|
||||||
|
continue
|
||||||
|
payload = json.loads(line.split(" value=", 1)[1])
|
||||||
|
if payload.get("deployment_id") == "worker-rollout":
|
||||||
|
desired_nodes.append(payload["node_id"])
|
||||||
|
|
||||||
|
assert desired_nodes == [], desired_nodes
|
||||||
|
print("desired-system cleanup validated")
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "Host lifecycle E2E verification passed"
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
# Storage Benchmarks
|
# Storage Benchmarks
|
||||||
|
|
||||||
Generated on 2026-03-10T20:02:00+09:00 with:
|
Generated on 2026-03-27T12:08:47+09:00 with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-bench-storage
|
nix run ./nix/test-cluster#cluster -- bench-storage
|
||||||
```
|
```
|
||||||
|
|
||||||
## CoronaFS
|
## CoronaFS
|
||||||
|
|
@ -12,30 +12,35 @@ Cluster network baseline, measured with `iperf3` from `node04` to `node01` befor
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| TCP throughput | 22.83 MiB/s |
|
| TCP throughput | 45.92 MiB/s |
|
||||||
| TCP retransmits | 78 |
|
| TCP retransmits | 193 |
|
||||||
|
|
||||||
Measured from `node04`.
|
Measured from `node04`.
|
||||||
Local worker disk is the baseline. CoronaFS is the shared block volume path used for mutable VM disks, exported from `node01` over NBD.
|
Local worker disk is the baseline. CoronaFS now has two relevant data paths in the lab: the controller export sourced from `node01`, and the node-local export materialized onto the worker that actually attaches the mutable VM disk.
|
||||||
|
|
||||||
| Metric | Local Disk | CoronaFS |
|
| Metric | Local Disk | Controller Export | Node-local Export |
|
||||||
|---|---:|---:|
|
|---|---:|---:|---:|
|
||||||
| Sequential write | 26.36 MiB/s | 5.24 MiB/s |
|
| Sequential write | 679.05 MiB/s | 30.35 MiB/s | 395.06 MiB/s |
|
||||||
| Sequential read | 348.77 MiB/s | 10.08 MiB/s |
|
| Sequential read | 2723.40 MiB/s | 42.70 MiB/s | 709.14 MiB/s |
|
||||||
| 4k random read | 1243 IOPS | 145 IOPS |
|
| 4k random read | 16958 IOPS | 2034 IOPS | 5087 IOPS |
|
||||||
|
| 4k queued random read (`iodepth=32`) | 106026 IOPS | 14261 IOPS | 28898 IOPS |
|
||||||
|
|
||||||
Queue-depth profile (`libaio`, `iodepth=32`) from the same worker:
|
Queue-depth profile (`libaio`, `iodepth=32`) from the same worker:
|
||||||
|
|
||||||
| Metric | Local Disk | CoronaFS |
|
| Metric | Local Disk | Controller Export | Node-local Export |
|
||||||
|---|---:|---:|
|
|---|---:|---:|---:|
|
||||||
| Depth-32 write | 27.12 MiB/s | 11.42 MiB/s |
|
| Depth-32 write | 3417.45 MiB/s | 39.26 MiB/s | 178.04 MiB/s |
|
||||||
| Depth-32 read | 4797.47 MiB/s | 10.06 MiB/s |
|
| Depth-32 read | 12996.47 MiB/s | 55.71 MiB/s | 112.88 MiB/s |
|
||||||
|
|
||||||
Cross-worker shared-volume visibility, measured by writing on `node04` and reading from `node05` over the same CoronaFS NBD export:
|
Node-local materialization timing and target-node steady-state read path:
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| Cross-worker sequential read | 17.72 MiB/s |
|
| Node04 materialize latency | 9.23 s |
|
||||||
|
| Node05 materialize latency | 5.82 s |
|
||||||
|
| Node05 node-local sequential read | 709.14 MiB/s |
|
||||||
|
|
||||||
|
PlasmaVMC now prefers the worker-local CoronaFS export for mutable node-local volumes, even when the underlying materialization is a qcow2 overlay. The VM runtime section below is therefore the closest end-to-end proxy for real local-attach VM I/O, while the node-local export numbers remain useful for CoronaFS service consumers and for diagnosing exporter overhead.
|
||||||
|
|
||||||
## LightningStor
|
## LightningStor
|
||||||
|
|
||||||
|
|
@ -46,16 +51,16 @@ Cluster network baseline for this client, measured with `iperf3` from `node03` t
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| TCP throughput | 18.35 MiB/s |
|
| TCP throughput | 45.99 MiB/s |
|
||||||
| TCP retransmits | 78 |
|
| TCP retransmits | 207 |
|
||||||
|
|
||||||
### Large-object path
|
### Large-object path
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| Object size | 256 MiB |
|
| Object size | 256 MiB |
|
||||||
| Upload throughput | 8.11 MiB/s |
|
| Upload throughput | 18.20 MiB/s |
|
||||||
| Download throughput | 7.54 MiB/s |
|
| Download throughput | 39.21 MiB/s |
|
||||||
|
|
||||||
### Small-object batch
|
### Small-object batch
|
||||||
|
|
||||||
|
|
@ -63,10 +68,10 @@ Measured as 32 objects of 4 MiB each (128 MiB total).
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| Batch upload throughput | 0.81 MiB/s |
|
| Batch upload throughput | 18.96 MiB/s |
|
||||||
| Batch download throughput | 0.83 MiB/s |
|
| Batch download throughput | 39.88 MiB/s |
|
||||||
| PUT rate | 0.20 objects/s |
|
| PUT rate | 4.74 objects/s |
|
||||||
| GET rate | 0.21 objects/s |
|
| GET rate | 9.97 objects/s |
|
||||||
|
|
||||||
### Parallel small-object batch
|
### Parallel small-object batch
|
||||||
|
|
||||||
|
|
@ -74,34 +79,57 @@ Measured as the same 32 objects of 4 MiB each, but with 8 concurrent client jobs
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| Parallel batch upload throughput | 3.03 MiB/s |
|
| Parallel batch upload throughput | 16.23 MiB/s |
|
||||||
| Parallel batch download throughput | 2.89 MiB/s |
|
| Parallel batch download throughput | 26.07 MiB/s |
|
||||||
| Parallel PUT rate | 0.76 objects/s |
|
| Parallel PUT rate | 4.06 objects/s |
|
||||||
| Parallel GET rate | 0.72 objects/s |
|
| Parallel GET rate | 6.52 objects/s |
|
||||||
|
|
||||||
## VM Image Path
|
## VM Image Path
|
||||||
|
|
||||||
Measured against the real `PlasmaVMC -> LightningStor artifact -> CoronaFS-backed managed volume` path on `node01`.
|
Measured against the `PlasmaVMC -> LightningStor artifact -> CoronaFS-backed managed volume` clone path on `node01`.
|
||||||
|
|
||||||
| Metric | Result |
|
| Metric | Result |
|
||||||
|---|---:|
|
|---|---:|
|
||||||
| Guest image artifact size | 2017 MiB |
|
| Guest image artifact size | 2017 MiB |
|
||||||
| Guest image virtual size | 4096 MiB |
|
| Guest image virtual size | 4096 MiB |
|
||||||
| `CreateImage` latency | 176.03 s |
|
| `CreateImage` latency | 66.49 s |
|
||||||
| First image-backed `CreateVolume` latency | 76.51 s |
|
| First image-backed `CreateVolume` latency | 16.86 s |
|
||||||
| Second image-backed `CreateVolume` latency | 170.49 s |
|
| Second image-backed `CreateVolume` latency | 0.12 s |
|
||||||
|
|
||||||
|
## VM Runtime Path
|
||||||
|
|
||||||
|
Measured against the real `StartVm -> qemu attach -> guest boot -> guest fio` path on a worker node, using a CoronaFS-backed root disk and data disk.
|
||||||
|
|
||||||
|
| Metric | Result |
|
||||||
|
|---|---:|
|
||||||
|
| `StartVm` to qemu attach | 0.60 s |
|
||||||
|
| `StartVm` to guest benchmark result | 35.69 s |
|
||||||
|
| Guest sequential write | 123.49252223968506 MiB/s |
|
||||||
|
| Guest sequential read | 1492.7113695144653 MiB/s |
|
||||||
|
| Guest 4k random read | 25550 IOPS |
|
||||||
|
|
||||||
## Assessment
|
## Assessment
|
||||||
|
|
||||||
- CoronaFS shared-volume reads are currently 2.9% of the measured local-disk baseline on this nested-QEMU lab cluster.
|
- CoronaFS controller-export reads are currently 1.6% of the measured local-disk baseline on this nested-QEMU lab cluster.
|
||||||
- CoronaFS 4k random reads are currently 11.7% of the measured local-disk baseline.
|
- CoronaFS controller-export 4k random reads are currently 12.0% of the measured local-disk baseline.
|
||||||
- CoronaFS cross-worker reads are currently 5.1% of the measured local-disk sequential-read baseline, which is the more relevant signal for VM restart and migration paths.
|
- CoronaFS controller-export queued 4k random reads are currently 13.5% of the measured local queued-random-read baseline.
|
||||||
- CoronaFS sequential reads are currently 44.2% of the measured node04->node01 TCP baseline, which helps separate NBD/export overhead from raw cluster-network limits.
|
- CoronaFS controller-export sequential reads are currently 93.0% of the measured node04->node01 TCP baseline, which isolates the centralized source path from raw cluster-network limits.
|
||||||
- CoronaFS depth-32 reads are currently 0.2% of the local depth-32 baseline, which is a better proxy for queued guest I/O than the single-depth path.
|
- CoronaFS controller-export depth-32 reads are currently 0.4% of the local depth-32 baseline.
|
||||||
- The shared-volume path is functionally correct for mutable VM disks and migration tests, but its read-side throughput is still too low to call production-ready for heavier VM workloads.
|
- CoronaFS node-local reads are currently 26.0% of the measured local-disk baseline, which is the more relevant steady-state signal for mutable VM disks after attachment.
|
||||||
- LightningStor's replicated S3 path is working correctly, but 8.11 MiB/s upload and 7.54 MiB/s download are still lab-grade numbers rather than strong object-store throughput.
|
- CoronaFS node-local 4k random reads are currently 30.0% of the measured local-disk baseline.
|
||||||
- LightningStor large-object downloads are currently 41.1% of the same node04->node01 TCP baseline, which indicates how much of the headroom is being lost above the raw network path.
|
- CoronaFS node-local queued 4k random reads are currently 27.3% of the measured local queued-random-read baseline.
|
||||||
- LightningStor's small-object batch path is also functional, but 0.20 PUT/s and 0.21 GET/s still indicate a lab cluster rather than a tuned object-storage deployment.
|
- CoronaFS node-local depth-32 reads are currently 0.9% of the local depth-32 baseline.
|
||||||
- The parallel small-object profile is the more relevant control-plane/object-ingest signal; it currently reaches 0.76 PUT/s and 0.72 GET/s.
|
- The target worker's node-local read path is 26.0% of the measured local sequential-read baseline after materialization, which is the better proxy for restart and migration steady state than the old shared-export read.
|
||||||
- The VM image path is now measured directly rather than inferred. The cold `CreateVolume` path includes artifact fetch plus CoronaFS population; the warm `CreateVolume` path isolates repeated CoronaFS population from an already cached image.
|
- PlasmaVMC now attaches writable node-local volumes through the worker-local CoronaFS export, so the guest-runtime section should be treated as the real local VM steady-state path rather than the node-local export numbers alone.
|
||||||
|
- CoronaFS single-depth writes remain sensitive to the nested-QEMU/VDE lab transport, so the queued-depth and guest-runtime numbers are still the more reliable proxy for real VM workload behavior than the single-stream write figure alone.
|
||||||
|
- The central export path is now best understood as a source/materialization path; the worker-local export is the path that should determine VM-disk readiness going forward.
|
||||||
|
- LightningStor's replicated S3 path is working correctly, but 18.20 MiB/s upload and 39.21 MiB/s download are still lab-grade numbers rather than strong object-store throughput.
|
||||||
|
- LightningStor large-object downloads are currently 85.3% of the same node04->node01 TCP baseline, which indicates how much of the headroom is being lost above the raw network path.
|
||||||
|
- The current S3 frontend tuning baseline is the built-in 16 MiB streaming threshold with multipart PUT/FETCH concurrency of 8; that combination is the best default observed on this lab cluster so far.
|
||||||
|
- LightningStor uploads should be read against the replication write quorum and the same ~45.99 MiB/s lab network ceiling; this environment still limits end-to-end throughput well before modern bare-metal NICs would.
|
||||||
|
- LightningStor's small-object batch path is also functional, but 4.74 PUT/s and 9.97 GET/s still indicate a lab cluster rather than a tuned object-storage deployment.
|
||||||
|
- The parallel small-object profile is the more relevant control-plane/object-ingest signal; it currently reaches 4.06 PUT/s and 6.52 GET/s.
|
||||||
|
- The VM image section measures clone/materialization cost, not guest runtime I/O.
|
||||||
|
- The PlasmaVMC local image-backed clone fast path is now active again; a 0.12 s second clone indicates the CoronaFS qcow2 backing-file path is being hit on node01 rather than falling back to eager raw materialization.
|
||||||
|
- The VM runtime section is the real `PlasmaVMC + CoronaFS + QEMU virtio-blk + guest kernel` path; use it to judge whether QEMU/NBD tuning is helping.
|
||||||
- The local sequential-write baseline is noisy in this environment, so the read and random-read deltas are the more reliable signal.
|
- The local sequential-write baseline is noisy in this environment, so the read and random-read deltas are the more reliable signal.
|
||||||
|
|
|
||||||
574
fiberlb/Cargo.lock
generated
574
fiberlb/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
17
flake.lock
generated
17
flake.lock
generated
|
|
@ -76,7 +76,8 @@
|
||||||
"flake-utils": "flake-utils",
|
"flake-utils": "flake-utils",
|
||||||
"nix-nos": "nix-nos",
|
"nix-nos": "nix-nos",
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"rust-overlay": "rust-overlay"
|
"rust-overlay": "rust-overlay",
|
||||||
|
"systems": "systems_2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"rust-overlay": {
|
"rust-overlay": {
|
||||||
|
|
@ -113,6 +114,20 @@
|
||||||
"repo": "default",
|
"repo": "default",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"systems_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "systems",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
|
|
||||||
467
flake.nix
467
flake.nix
|
|
@ -33,7 +33,7 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# OUTPUTS: What this flake provides
|
# OUTPUTS: What this flake provides
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos }:
|
outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }:
|
||||||
flake-utils.lib.eachDefaultSystem (system:
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
let
|
let
|
||||||
# Apply rust-overlay to get rust-bin attribute
|
# Apply rust-overlay to get rust-bin attribute
|
||||||
|
|
@ -139,6 +139,301 @@
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
flakeInputsBlock = ''
|
||||||
|
inputs = {
|
||||||
|
# Use unstable nixpkgs for latest packages
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
|
# Rust overlay for managing Rust toolchains
|
||||||
|
rust-overlay = {
|
||||||
|
url = "github:oxalica/rust-overlay";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Flake utilities for multi-system support
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
|
||||||
|
# Disko for declarative disk partitioning
|
||||||
|
disko = {
|
||||||
|
url = "github:nix-community/disko";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Nix-NOS generic network operating system modules
|
||||||
|
nix-nos = {
|
||||||
|
url = "path:./nix-nos";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
'';
|
||||||
|
|
||||||
|
bundledInputsBlock = ''
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "path:./.bundle-inputs/nixpkgs";
|
||||||
|
|
||||||
|
rust-overlay = {
|
||||||
|
url = "path:./.bundle-inputs/rust-overlay";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
flake-utils = {
|
||||||
|
url = "path:./.bundle-inputs/flake-utils";
|
||||||
|
inputs.systems.follows = "systems";
|
||||||
|
};
|
||||||
|
|
||||||
|
systems.url = "path:./.bundle-inputs/systems";
|
||||||
|
|
||||||
|
disko = {
|
||||||
|
url = "path:./.bundle-inputs/disko";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
nix-nos = {
|
||||||
|
url = "path:./nix-nos";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
'';
|
||||||
|
|
||||||
|
flakeHeaderBlock = ''
|
||||||
|
# ============================================================================
|
||||||
|
# INPUTS: External dependencies
|
||||||
|
# ============================================================================
|
||||||
|
inputs = {
|
||||||
|
# Use unstable nixpkgs for latest packages
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
|
# Rust overlay for managing Rust toolchains
|
||||||
|
rust-overlay = {
|
||||||
|
url = "github:oxalica/rust-overlay";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Flake utilities for multi-system support
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
|
||||||
|
# Disko for declarative disk partitioning
|
||||||
|
disko = {
|
||||||
|
url = "github:nix-community/disko";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Nix-NOS generic network operating system modules
|
||||||
|
nix-nos = {
|
||||||
|
url = "path:./nix-nos";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# OUTPUTS: What this flake provides
|
||||||
|
# ============================================================================
|
||||||
|
outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }:
|
||||||
|
'';
|
||||||
|
|
||||||
|
bundledHeaderBlock = ''
|
||||||
|
# ============================================================================
|
||||||
|
# INPUTS: External dependencies
|
||||||
|
# ============================================================================
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "path:./.bundle-inputs/nixpkgs";
|
||||||
|
|
||||||
|
rust-overlay = {
|
||||||
|
url = "path:./.bundle-inputs/rust-overlay";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
flake-utils = {
|
||||||
|
url = "path:./.bundle-inputs/flake-utils";
|
||||||
|
inputs.systems.follows = "systems";
|
||||||
|
};
|
||||||
|
|
||||||
|
systems.url = "path:./.bundle-inputs/systems";
|
||||||
|
|
||||||
|
disko = {
|
||||||
|
url = "path:./.bundle-inputs/disko";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
|
||||||
|
nix-nos = {
|
||||||
|
url = "path:./nix-nos";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# OUTPUTS: What this flake provides
|
||||||
|
# ============================================================================
|
||||||
|
outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }:
|
||||||
|
'';
|
||||||
|
|
||||||
|
bundledFlakeNix =
|
||||||
|
pkgs.writeText
|
||||||
|
"plasmacloud-bundled-flake.nix"
|
||||||
|
(
|
||||||
|
builtins.replaceStrings
|
||||||
|
[ flakeHeaderBlock ]
|
||||||
|
[ bundledHeaderBlock ]
|
||||||
|
(builtins.readFile ./flake.nix)
|
||||||
|
);
|
||||||
|
|
||||||
|
bundledFlakeHeaderFile =
|
||||||
|
pkgs.writeText "plasmacloud-bundled-flake-header" bundledHeaderBlock;
|
||||||
|
|
||||||
|
baseFlakeLock = builtins.fromJSON (builtins.readFile ./flake.lock);
|
||||||
|
|
||||||
|
bundleInputRelPaths = {
|
||||||
|
nixpkgs = "./.bundle-inputs/nixpkgs";
|
||||||
|
"rust-overlay" = "./.bundle-inputs/rust-overlay";
|
||||||
|
"flake-utils" = "./.bundle-inputs/flake-utils";
|
||||||
|
disko = "./.bundle-inputs/disko";
|
||||||
|
systems = "./.bundle-inputs/systems";
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchLockedInput =
|
||||||
|
nodeName:
|
||||||
|
let
|
||||||
|
tree = builtins.fetchTree baseFlakeLock.nodes.${nodeName}.locked;
|
||||||
|
in
|
||||||
|
if builtins.isAttrs tree && tree ? outPath then tree.outPath else tree;
|
||||||
|
|
||||||
|
vendoredFlakeInputs = {
|
||||||
|
nixpkgs = fetchLockedInput "nixpkgs";
|
||||||
|
"rust-overlay" = fetchLockedInput "rust-overlay";
|
||||||
|
"flake-utils" = fetchLockedInput "flake-utils";
|
||||||
|
disko = fetchLockedInput "disko";
|
||||||
|
systems = fetchLockedInput "systems";
|
||||||
|
};
|
||||||
|
|
||||||
|
makeBundledLockNode =
|
||||||
|
nodeName: relPath:
|
||||||
|
let
|
||||||
|
node = baseFlakeLock.nodes.${nodeName};
|
||||||
|
in
|
||||||
|
node
|
||||||
|
// {
|
||||||
|
locked = {
|
||||||
|
type = "path";
|
||||||
|
path = relPath;
|
||||||
|
};
|
||||||
|
original = {
|
||||||
|
type = "path";
|
||||||
|
path = relPath;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
bundledFlakeLock = baseFlakeLock // {
|
||||||
|
nodes =
|
||||||
|
baseFlakeLock.nodes
|
||||||
|
// {
|
||||||
|
root =
|
||||||
|
baseFlakeLock.nodes.root
|
||||||
|
// {
|
||||||
|
inputs =
|
||||||
|
baseFlakeLock.nodes.root.inputs
|
||||||
|
// {
|
||||||
|
systems = "systems";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
nixpkgs = makeBundledLockNode "nixpkgs" bundleInputRelPaths.nixpkgs;
|
||||||
|
"rust-overlay" = makeBundledLockNode "rust-overlay" bundleInputRelPaths."rust-overlay";
|
||||||
|
"flake-utils" = makeBundledLockNode "flake-utils" bundleInputRelPaths."flake-utils";
|
||||||
|
disko = makeBundledLockNode "disko" bundleInputRelPaths.disko;
|
||||||
|
systems = makeBundledLockNode "systems" bundleInputRelPaths.systems;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
bundledFlakeLockFile =
|
||||||
|
pkgs.writeText "plasmacloud-bundled-flake.lock" (builtins.toJSON bundledFlakeLock);
|
||||||
|
|
||||||
|
inBundledEval = builtins.pathExists ./.bundle-eval-marker;
|
||||||
|
|
||||||
|
bundledFlakeRootDrv = pkgs.runCommand "plasmacloud-bundled-flake-root" {
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.python3
|
||||||
|
];
|
||||||
|
} ''
|
||||||
|
mkdir -p "$out"
|
||||||
|
cp -a ${flakeBundleSrc}/. "$out"/
|
||||||
|
chmod -R u+w "$out"
|
||||||
|
touch "$out/.bundle-eval-marker"
|
||||||
|
mkdir -p "$out/.bundle-inputs"
|
||||||
|
cp -a ${vendoredFlakeInputs.nixpkgs} "$out/.bundle-inputs/nixpkgs"
|
||||||
|
cp -a ${vendoredFlakeInputs."rust-overlay"} "$out/.bundle-inputs/rust-overlay"
|
||||||
|
cp -a ${vendoredFlakeInputs."flake-utils"} "$out/.bundle-inputs/flake-utils"
|
||||||
|
cp -a ${vendoredFlakeInputs.disko} "$out/.bundle-inputs/disko"
|
||||||
|
cp -a ${vendoredFlakeInputs.systems} "$out/.bundle-inputs/systems"
|
||||||
|
cp ${bundledFlakeLockFile} "$out/flake.lock"
|
||||||
|
python3 - <<'PY' "$out/flake.nix" ${bundledFlakeHeaderFile}
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
flake_path = Path(sys.argv[1])
|
||||||
|
header = Path(sys.argv[2]).read_text()
|
||||||
|
source = flake_path.read_text()
|
||||||
|
pattern = re.compile(
|
||||||
|
r" # ============================================================================\n"
|
||||||
|
r" # INPUTS: External dependencies\n"
|
||||||
|
r" # ============================================================================\n"
|
||||||
|
r" inputs = \{.*?\n"
|
||||||
|
r" # ============================================================================\n"
|
||||||
|
r" # OUTPUTS: What this flake provides\n"
|
||||||
|
r" # ============================================================================\n"
|
||||||
|
r" outputs = \{ self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems \? null \}:",
|
||||||
|
re.S,
|
||||||
|
)
|
||||||
|
rewritten, count = pattern.subn(header.rstrip("\n"), source, count=1)
|
||||||
|
if count != 1:
|
||||||
|
raise SystemExit(f"expected to rewrite 1 flake header, rewrote {count}")
|
||||||
|
flake_path.write_text(rewritten)
|
||||||
|
PY
|
||||||
|
'';
|
||||||
|
|
||||||
|
bundledFlakeRoot =
|
||||||
|
if inBundledEval then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
builtins.path {
|
||||||
|
path = bundledFlakeRootDrv;
|
||||||
|
name = "plasmacloud-bundled-flake-root-src";
|
||||||
|
};
|
||||||
|
|
||||||
|
bundledFlakeRootNarHashFile =
|
||||||
|
if inBundledEval then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
pkgs.runCommand "plasmacloud-bundled-flake-root-narhash" {
|
||||||
|
nativeBuildInputs = [ pkgs.nix ];
|
||||||
|
} ''
|
||||||
|
${pkgs.nix}/bin/nix \
|
||||||
|
--extra-experimental-features nix-command \
|
||||||
|
hash path --sri ${bundledFlakeRoot} \
|
||||||
|
| tr -d '\n' > "$out"
|
||||||
|
'';
|
||||||
|
|
||||||
|
bundledFlakeRootNarHash =
|
||||||
|
if inBundledEval then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
builtins.readFile bundledFlakeRootNarHashFile;
|
||||||
|
|
||||||
|
bundledFlake =
|
||||||
|
if inBundledEval then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
builtins.getFlake (
|
||||||
|
builtins.unsafeDiscardStringContext
|
||||||
|
"path:${toString bundledFlakeRoot}?narHash=${bundledFlakeRootNarHash}"
|
||||||
|
);
|
||||||
|
|
||||||
|
bundledVmSmokeTargetToplevel =
|
||||||
|
if inBundledEval then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
bundledFlake.nixosConfigurations.vm-smoke-target.config.system.build.toplevel;
|
||||||
|
|
||||||
# Helper function to build a Rust workspace package
|
# Helper function to build a Rust workspace package
|
||||||
# Parameters:
|
# Parameters:
|
||||||
# name: package name (e.g., "chainfire-server")
|
# name: package name (e.g., "chainfire-server")
|
||||||
|
|
@ -434,16 +729,31 @@
|
||||||
description = "Node-local NixOS reconciliation agent for PhotonCloud hosts";
|
description = "Node-local NixOS reconciliation agent for PhotonCloud hosts";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
plasmacloud-reconciler = buildRustWorkspace {
|
||||||
|
name = "plasmacloud-reconciler";
|
||||||
|
workspaceSubdir = "deployer";
|
||||||
|
mainCrate = "plasmacloud-reconciler";
|
||||||
|
description = "Declarative reconciler for host rollouts and published resources";
|
||||||
|
};
|
||||||
|
|
||||||
plasmacloudFlakeBundle = pkgs.runCommand "plasmacloud-flake-bundle.tar.gz" {
|
plasmacloudFlakeBundle = pkgs.runCommand "plasmacloud-flake-bundle.tar.gz" {
|
||||||
nativeBuildInputs = [ pkgs.gnutar pkgs.gzip ];
|
nativeBuildInputs = [
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.gnutar
|
||||||
|
pkgs.gzip
|
||||||
|
];
|
||||||
} ''
|
} ''
|
||||||
|
bundle_root="$(mktemp -d)"
|
||||||
|
cp -a ${bundledFlakeRootDrv}/. "$bundle_root"/
|
||||||
|
chmod -R u+w "$bundle_root"
|
||||||
|
|
||||||
tar \
|
tar \
|
||||||
--sort=name \
|
--sort=name \
|
||||||
--mtime='@1' \
|
--mtime='@1' \
|
||||||
--owner=0 \
|
--owner=0 \
|
||||||
--group=0 \
|
--group=0 \
|
||||||
--numeric-owner \
|
--numeric-owner \
|
||||||
-C ${flakeBundleSrc} \
|
-C "$bundle_root" \
|
||||||
-cf - . \
|
-cf - . \
|
||||||
| gzip -n > "$out"
|
| gzip -n > "$out"
|
||||||
'';
|
'';
|
||||||
|
|
@ -462,6 +772,7 @@
|
||||||
self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState;
|
self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState;
|
||||||
|
|
||||||
vmClusterFlakeBundle = self.packages.${system}.plasmacloudFlakeBundle;
|
vmClusterFlakeBundle = self.packages.${system}.plasmacloudFlakeBundle;
|
||||||
|
vmSmokeBundledTargetToplevel = bundledVmSmokeTargetToplevel;
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Default package: Build all servers
|
# Default package: Build all servers
|
||||||
|
|
@ -484,6 +795,7 @@
|
||||||
self.packages.${system}.k8shost-server
|
self.packages.${system}.k8shost-server
|
||||||
self.packages.${system}.deployer-server
|
self.packages.${system}.deployer-server
|
||||||
self.packages.${system}.deployer-ctl
|
self.packages.${system}.deployer-ctl
|
||||||
|
self.packages.${system}.plasmacloud-reconciler
|
||||||
self.packages.${system}.nix-agent
|
self.packages.${system}.nix-agent
|
||||||
self.packages.${system}.node-agent
|
self.packages.${system}.node-agent
|
||||||
self.packages.${system}.fleet-scheduler
|
self.packages.${system}.fleet-scheduler
|
||||||
|
|
@ -556,6 +868,10 @@
|
||||||
drv = self.packages.${system}.deployer-ctl;
|
drv = self.packages.${system}.deployer-ctl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
plasmacloud-reconciler = flake-utils.lib.mkApp {
|
||||||
|
drv = self.packages.${system}.plasmacloud-reconciler;
|
||||||
|
};
|
||||||
|
|
||||||
nix-agent = flake-utils.lib.mkApp {
|
nix-agent = flake-utils.lib.mkApp {
|
||||||
drv = self.packages.${system}.nix-agent;
|
drv = self.packages.${system}.nix-agent;
|
||||||
};
|
};
|
||||||
|
|
@ -568,6 +884,144 @@
|
||||||
drv = self.packages.${system}.fleet-scheduler;
|
drv = self.packages.${system}.fleet-scheduler;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
checks = {
|
||||||
|
deployer-vm-smoke = pkgs.testers.runNixOSTest (
|
||||||
|
import ./nix/tests/deployer-vm-smoke.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
photoncloudPackages = self.packages.${system};
|
||||||
|
smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
deployer-vm-rollback = pkgs.testers.runNixOSTest (
|
||||||
|
import ./nix/tests/deployer-vm-smoke.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
photoncloudPackages = self.packages.${system};
|
||||||
|
smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel;
|
||||||
|
desiredSystemOverrides = {
|
||||||
|
health_check_command = [ "false" ];
|
||||||
|
rollback_on_failure = true;
|
||||||
|
};
|
||||||
|
expectedStatus = "rolled-back";
|
||||||
|
expectCurrentSystemMatchesTarget = false;
|
||||||
|
expectMarkerPresent = false;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" {
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
bash
|
||||||
|
coreutils
|
||||||
|
curl
|
||||||
|
findutils
|
||||||
|
gawk
|
||||||
|
gnugrep
|
||||||
|
gnused
|
||||||
|
procps
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
PHOTONCLOUD_E2E_IN_NIX = "1";
|
||||||
|
PHOTONCLOUD_CHAINFIRE_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.chainfire-server}/bin/chainfire";
|
||||||
|
PHOTONCLOUD_DEPLOYER_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.deployer-server}/bin/deployer-server";
|
||||||
|
PHOTONCLOUD_DEPLOYER_CTL_BIN =
|
||||||
|
"${self.packages.${system}.deployer-ctl}/bin/deployer-ctl";
|
||||||
|
} ''
|
||||||
|
export HOME="$TMPDIR/home"
|
||||||
|
mkdir -p "$HOME"
|
||||||
|
export PATH="${pkgs.lib.makeBinPath [
|
||||||
|
pkgs.bash
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.curl
|
||||||
|
pkgs.findutils
|
||||||
|
pkgs.gawk
|
||||||
|
pkgs.gnugrep
|
||||||
|
pkgs.gnused
|
||||||
|
pkgs.procps
|
||||||
|
pkgs.python3
|
||||||
|
]}"
|
||||||
|
bash ${./deployer/scripts/verify-deployer-bootstrap-e2e.sh}
|
||||||
|
touch "$out"
|
||||||
|
'';
|
||||||
|
|
||||||
|
host-lifecycle-e2e = pkgs.runCommand "host-lifecycle-e2e" {
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
bash
|
||||||
|
coreutils
|
||||||
|
curl
|
||||||
|
findutils
|
||||||
|
gawk
|
||||||
|
gnugrep
|
||||||
|
gnused
|
||||||
|
procps
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
PHOTONCLOUD_E2E_IN_NIX = "1";
|
||||||
|
PHOTONCLOUD_CHAINFIRE_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.chainfire-server}/bin/chainfire";
|
||||||
|
PHOTONCLOUD_DEPLOYER_CTL_BIN =
|
||||||
|
"${self.packages.${system}.deployer-ctl}/bin/deployer-ctl";
|
||||||
|
PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN =
|
||||||
|
"${self.packages.${system}.plasmacloud-reconciler}/bin/plasmacloud-reconciler";
|
||||||
|
} ''
|
||||||
|
export HOME="$TMPDIR/home"
|
||||||
|
mkdir -p "$HOME"
|
||||||
|
export PATH="${pkgs.lib.makeBinPath [
|
||||||
|
pkgs.bash
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.curl
|
||||||
|
pkgs.findutils
|
||||||
|
pkgs.gawk
|
||||||
|
pkgs.gnugrep
|
||||||
|
pkgs.gnused
|
||||||
|
pkgs.procps
|
||||||
|
pkgs.python3
|
||||||
|
]}"
|
||||||
|
bash ${./deployer/scripts/verify-host-lifecycle-e2e.sh}
|
||||||
|
touch "$out"
|
||||||
|
'';
|
||||||
|
|
||||||
|
fleet-scheduler-e2e = pkgs.runCommand "fleet-scheduler-e2e" {
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
bash
|
||||||
|
coreutils
|
||||||
|
curl
|
||||||
|
findutils
|
||||||
|
gawk
|
||||||
|
gnugrep
|
||||||
|
gnused
|
||||||
|
procps
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
PHOTONCLOUD_E2E_IN_NIX = "1";
|
||||||
|
PHOTONCLOUD_CHAINFIRE_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.chainfire-server}/bin/chainfire";
|
||||||
|
PHOTONCLOUD_DEPLOYER_CTL_BIN =
|
||||||
|
"${self.packages.${system}.deployer-ctl}/bin/deployer-ctl";
|
||||||
|
PHOTONCLOUD_NODE_AGENT_BIN =
|
||||||
|
"${self.packages.${system}.node-agent}/bin/node-agent";
|
||||||
|
PHOTONCLOUD_FLEET_SCHEDULER_BIN =
|
||||||
|
"${self.packages.${system}.fleet-scheduler}/bin/fleet-scheduler";
|
||||||
|
} ''
|
||||||
|
export HOME="$TMPDIR/home"
|
||||||
|
mkdir -p "$HOME"
|
||||||
|
export PATH="${pkgs.lib.makeBinPath [
|
||||||
|
pkgs.bash
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.curl
|
||||||
|
pkgs.findutils
|
||||||
|
pkgs.gawk
|
||||||
|
pkgs.gnugrep
|
||||||
|
pkgs.gnused
|
||||||
|
pkgs.procps
|
||||||
|
pkgs.python3
|
||||||
|
]}"
|
||||||
|
bash ${./deployer/scripts/verify-fleet-scheduler-e2e.sh}
|
||||||
|
touch "$out"
|
||||||
|
'';
|
||||||
|
};
|
||||||
}
|
}
|
||||||
) // {
|
) // {
|
||||||
# ========================================================================
|
# ========================================================================
|
||||||
|
|
@ -606,6 +1060,12 @@
|
||||||
modules = [ ./nix/images/netboot-base.nix ];
|
modules = [ ./nix/images/netboot-base.nix ];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Offline-friendly target used by deployer VM smoke tests.
|
||||||
|
vm-smoke-target = nixpkgs.lib.nixosSystem {
|
||||||
|
system = "x86_64-linux";
|
||||||
|
modules = [ ./nix/images/deployer-vm-smoke-target.nix ];
|
||||||
|
};
|
||||||
|
|
||||||
# PlasmaCloud ISO (T061.S5 - bootable ISO with cluster-config embedding)
|
# PlasmaCloud ISO (T061.S5 - bootable ISO with cluster-config embedding)
|
||||||
plasmacloud-iso = nixpkgs.lib.nixosSystem {
|
plasmacloud-iso = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
|
|
@ -732,6 +1192,7 @@
|
||||||
k8shost-server = self.packages.${final.system}.k8shost-server;
|
k8shost-server = self.packages.${final.system}.k8shost-server;
|
||||||
deployer-server = self.packages.${final.system}.deployer-server;
|
deployer-server = self.packages.${final.system}.deployer-server;
|
||||||
deployer-ctl = self.packages.${final.system}.deployer-ctl;
|
deployer-ctl = self.packages.${final.system}.deployer-ctl;
|
||||||
|
plasmacloud-reconciler = self.packages.${final.system}.plasmacloud-reconciler;
|
||||||
plasmacloudFlakeBundle = self.packages.${final.system}.plasmacloudFlakeBundle;
|
plasmacloudFlakeBundle = self.packages.${final.system}.plasmacloudFlakeBundle;
|
||||||
nix-agent = self.packages.${final.system}.nix-agent;
|
nix-agent = self.packages.${final.system}.nix-agent;
|
||||||
node-agent = self.packages.${final.system}.node-agent;
|
node-agent = self.packages.${final.system}.node-agent;
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ use flaredb_proto::kvrpc::{
|
||||||
use flaredb_proto::pdpb::Store;
|
use flaredb_proto::pdpb::Store;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{Instant, SystemTime, UNIX_EPOCH};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
use tonic::transport::Channel;
|
use tonic::transport::Channel;
|
||||||
|
|
@ -35,6 +35,7 @@ pub struct RdbClient {
|
||||||
chainfire_kv_client: Option<ChainfireKvClient<Channel>>,
|
chainfire_kv_client: Option<ChainfireKvClient<Channel>>,
|
||||||
|
|
||||||
region_cache: RegionCache,
|
region_cache: RegionCache,
|
||||||
|
chainfire_route_cache: Arc<Mutex<Option<ChainfireRouteSnapshot>>>,
|
||||||
namespace: String,
|
namespace: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -53,10 +54,18 @@ struct ChainfireRegionInfo {
|
||||||
leader_id: u64,
|
leader_id: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ChainfireRouteSnapshot {
|
||||||
|
stores: HashMap<u64, ChainfireStoreInfo>,
|
||||||
|
regions: Vec<ChainfireRegionInfo>,
|
||||||
|
fetched_at: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
impl RdbClient {
|
impl RdbClient {
|
||||||
const ROUTE_RETRY_LIMIT: usize = 12;
|
const ROUTE_RETRY_LIMIT: usize = 12;
|
||||||
const ROUTE_RETRY_BASE_DELAY_MS: u64 = 100;
|
const ROUTE_RETRY_BASE_DELAY_MS: u64 = 100;
|
||||||
const ROUTED_RPC_TIMEOUT: Duration = Duration::from_secs(1);
|
const ROUTED_RPC_TIMEOUT: Duration = Duration::from_secs(1);
|
||||||
|
const CHAINFIRE_ROUTE_CACHE_TTL: Duration = Duration::from_secs(2);
|
||||||
|
|
||||||
pub async fn connect_with_pd(
|
pub async fn connect_with_pd(
|
||||||
_server_addr: String,
|
_server_addr: String,
|
||||||
|
|
@ -70,36 +79,68 @@ impl RdbClient {
|
||||||
pd_addr: String,
|
pd_addr: String,
|
||||||
namespace: impl Into<String>,
|
namespace: impl Into<String>,
|
||||||
) -> Result<Self, tonic::transport::Error> {
|
) -> Result<Self, tonic::transport::Error> {
|
||||||
|
let pd_endpoints = parse_transport_endpoints(&pd_addr);
|
||||||
|
let normalized_server_addr = normalize_transport_addr(&server_addr);
|
||||||
// A number of in-repo callers still pass the same address for both server and PD.
|
// A number of in-repo callers still pass the same address for both server and PD.
|
||||||
// In that case, prefer direct routing and skip the PD lookup path entirely.
|
// In that case, prefer direct routing and skip the PD lookup path entirely.
|
||||||
let direct_addr = if !server_addr.is_empty() && server_addr == pd_addr {
|
let direct_addr = if !normalized_server_addr.is_empty()
|
||||||
Some(server_addr)
|
&& pd_endpoints
|
||||||
|
.iter()
|
||||||
|
.any(|endpoint| normalize_transport_addr(endpoint) == normalized_server_addr)
|
||||||
|
{
|
||||||
|
Some(normalized_server_addr.clone())
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
let (tso_client, pd_client, chainfire_kv_client) = if direct_addr.is_some() {
|
let (tso_client, pd_client, chainfire_kv_client) = if direct_addr.is_some() {
|
||||||
(None, None, None)
|
(None, None, None)
|
||||||
} else {
|
} else {
|
||||||
let pd_channel = Channel::from_shared(transport_endpoint(&pd_addr))
|
let mut last_error = None;
|
||||||
.unwrap()
|
let mut clients = None;
|
||||||
.connect()
|
for endpoint in &pd_endpoints {
|
||||||
.await?;
|
let pd_channel = match Channel::from_shared(transport_endpoint(endpoint)) {
|
||||||
let mut probe_client = PdClient::new(pd_channel.clone());
|
Ok(endpoint) => match endpoint.connect().await {
|
||||||
let probe = probe_client
|
Ok(channel) => channel,
|
||||||
.get_region(GetRegionRequest { key: Vec::new() })
|
Err(error) => {
|
||||||
.await;
|
last_error = Some(error);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut probe_client = PdClient::new(pd_channel.clone());
|
||||||
|
let probe = probe_client
|
||||||
|
.get_region(GetRegionRequest { key: Vec::new() })
|
||||||
|
.await;
|
||||||
|
|
||||||
match probe {
|
clients = Some(match probe {
|
||||||
Err(status) if status.code() == tonic::Code::Unimplemented => (
|
Err(status) if status.code() == tonic::Code::Unimplemented => (
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
Some(ChainfireKvClient::new(pd_channel)),
|
Some(ChainfireKvClient::new(pd_channel)),
|
||||||
),
|
),
|
||||||
_ => (
|
_ => (
|
||||||
Some(TsoClient::new(pd_channel.clone())),
|
Some(TsoClient::new(pd_channel.clone())),
|
||||||
Some(PdClient::new(pd_channel)),
|
Some(PdClient::new(pd_channel)),
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if let Some(clients) = clients {
|
||||||
|
clients
|
||||||
|
} else if let Some(error) = last_error {
|
||||||
|
return Err(error);
|
||||||
|
} else {
|
||||||
|
return Err(
|
||||||
|
Channel::from_shared("http://127.0.0.1:1".to_string())
|
||||||
|
.unwrap()
|
||||||
|
.connect()
|
||||||
|
.await
|
||||||
|
.expect_err("unreachable fallback endpoint should fail to connect"),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -111,6 +152,7 @@ impl RdbClient {
|
||||||
chainfire_kv_client,
|
chainfire_kv_client,
|
||||||
region_cache: RegionCache::new(),
|
region_cache: RegionCache::new(),
|
||||||
namespace: namespace.into(),
|
namespace: namespace.into(),
|
||||||
|
chainfire_route_cache: Arc::new(Mutex::new(None)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,17 +161,51 @@ impl RdbClient {
|
||||||
server_addr: String,
|
server_addr: String,
|
||||||
namespace: impl Into<String>,
|
namespace: impl Into<String>,
|
||||||
) -> Result<Self, tonic::transport::Error> {
|
) -> Result<Self, tonic::transport::Error> {
|
||||||
let ep = transport_endpoint(&server_addr);
|
let direct_endpoints = parse_transport_endpoints(&server_addr);
|
||||||
let channel = Channel::from_shared(ep).unwrap().connect().await?;
|
let mut last_error = None;
|
||||||
|
let mut selected_addr = None;
|
||||||
|
let mut channel = None;
|
||||||
|
|
||||||
|
for endpoint in &direct_endpoints {
|
||||||
|
match Channel::from_shared(transport_endpoint(endpoint)) {
|
||||||
|
Ok(endpoint_builder) => match endpoint_builder.connect().await {
|
||||||
|
Ok(connected) => {
|
||||||
|
selected_addr = Some(endpoint.clone());
|
||||||
|
channel = Some(connected);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
last_error = Some(error);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let selected_addr = if let Some(addr) = selected_addr {
|
||||||
|
addr
|
||||||
|
} else if let Some(error) = last_error {
|
||||||
|
return Err(error);
|
||||||
|
} else {
|
||||||
|
return Err(
|
||||||
|
Channel::from_shared("http://127.0.0.1:1".to_string())
|
||||||
|
.unwrap()
|
||||||
|
.connect()
|
||||||
|
.await
|
||||||
|
.expect_err("unreachable fallback endpoint should fail to connect"),
|
||||||
|
);
|
||||||
|
};
|
||||||
|
let channel = channel.expect("direct connect should produce a channel when selected");
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
channels: Arc::new(Mutex::new(HashMap::new())),
|
channels: Arc::new(Mutex::new(HashMap::new())),
|
||||||
direct_addr: Some(server_addr),
|
direct_addr: Some(selected_addr),
|
||||||
tso_client: Some(TsoClient::new(channel.clone())),
|
tso_client: Some(TsoClient::new(channel.clone())),
|
||||||
pd_client: Some(PdClient::new(channel)),
|
pd_client: Some(PdClient::new(channel)),
|
||||||
chainfire_kv_client: None,
|
chainfire_kv_client: None,
|
||||||
region_cache: RegionCache::new(),
|
region_cache: RegionCache::new(),
|
||||||
namespace: namespace.into(),
|
namespace: namespace.into(),
|
||||||
|
chainfire_route_cache: Arc::new(Mutex::new(None)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -165,6 +241,7 @@ impl RdbClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
self.region_cache.clear().await;
|
self.region_cache.clear().await;
|
||||||
|
self.invalidate_chainfire_route_cache().await;
|
||||||
|
|
||||||
if let Some(chainfire_kv_client) = &self.chainfire_kv_client {
|
if let Some(chainfire_kv_client) = &self.chainfire_kv_client {
|
||||||
return self.resolve_addr_via_chainfire(key, chainfire_kv_client.clone()).await;
|
return self.resolve_addr_via_chainfire(key, chainfire_kv_client.clone()).await;
|
||||||
|
|
@ -183,10 +260,6 @@ impl RdbClient {
|
||||||
Err(tonic::Status::not_found("region not found"))
|
Err(tonic::Status::not_found("region not found"))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_channel(&self, addr: &str) -> Result<Channel, tonic::transport::Error> {
|
|
||||||
Self::get_channel_from_map(&self.channels, addr).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_channel_from_map(
|
async fn get_channel_from_map(
|
||||||
channels: &Arc<Mutex<HashMap<String, Channel>>>,
|
channels: &Arc<Mutex<HashMap<String, Channel>>>,
|
||||||
addr: &str,
|
addr: &str,
|
||||||
|
|
@ -207,6 +280,73 @@ impl RdbClient {
|
||||||
map.remove(addr);
|
map.remove(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn invalidate_chainfire_route_cache(&self) {
|
||||||
|
let mut cache = self.chainfire_route_cache.lock().await;
|
||||||
|
*cache = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn chainfire_route_snapshot(
|
||||||
|
&self,
|
||||||
|
mut kv_client: ChainfireKvClient<Channel>,
|
||||||
|
force_refresh: bool,
|
||||||
|
) -> Result<ChainfireRouteSnapshot, tonic::Status> {
|
||||||
|
if !force_refresh {
|
||||||
|
if let Some(snapshot) = self.chainfire_route_cache.lock().await.clone() {
|
||||||
|
if snapshot.fetched_at.elapsed() <= Self::CHAINFIRE_ROUTE_CACHE_TTL {
|
||||||
|
return Ok(snapshot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let regions = list_chainfire_regions(&mut kv_client).await?;
|
||||||
|
let stores = list_chainfire_stores(&mut kv_client).await?;
|
||||||
|
let snapshot = ChainfireRouteSnapshot {
|
||||||
|
stores,
|
||||||
|
regions,
|
||||||
|
fetched_at: Instant::now(),
|
||||||
|
};
|
||||||
|
let mut cache = self.chainfire_route_cache.lock().await;
|
||||||
|
*cache = Some(snapshot.clone());
|
||||||
|
Ok(snapshot)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_addr_from_chainfire_snapshot(
|
||||||
|
&self,
|
||||||
|
key: &[u8],
|
||||||
|
snapshot: &ChainfireRouteSnapshot,
|
||||||
|
) -> Result<(Region, Store), tonic::Status> {
|
||||||
|
let region = snapshot
|
||||||
|
.regions
|
||||||
|
.iter()
|
||||||
|
.find(|region| {
|
||||||
|
let start_ok = region.start_key.is_empty() || key >= region.start_key.as_slice();
|
||||||
|
let end_ok = region.end_key.is_empty() || key < region.end_key.as_slice();
|
||||||
|
start_ok && end_ok
|
||||||
|
})
|
||||||
|
.cloned()
|
||||||
|
.ok_or_else(|| tonic::Status::not_found("region not found"))?;
|
||||||
|
|
||||||
|
let leader = snapshot
|
||||||
|
.stores
|
||||||
|
.get(®ion.leader_id)
|
||||||
|
.cloned()
|
||||||
|
.ok_or_else(|| tonic::Status::not_found("leader store not found"))?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
Region {
|
||||||
|
id: region.id,
|
||||||
|
start_key: region.start_key,
|
||||||
|
end_key: region.end_key,
|
||||||
|
peers: region.peers,
|
||||||
|
leader_id: region.leader_id,
|
||||||
|
},
|
||||||
|
Store {
|
||||||
|
id: leader.id,
|
||||||
|
addr: leader.addr,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
async fn with_routed_addr<T, F, Fut>(&self, key: &[u8], mut op: F) -> Result<T, tonic::Status>
|
async fn with_routed_addr<T, F, Fut>(&self, key: &[u8], mut op: F) -> Result<T, tonic::Status>
|
||||||
where
|
where
|
||||||
F: FnMut(String) -> Fut,
|
F: FnMut(String) -> Fut,
|
||||||
|
|
@ -590,41 +730,21 @@ impl RdbClient {
|
||||||
async fn resolve_addr_via_chainfire(
|
async fn resolve_addr_via_chainfire(
|
||||||
&self,
|
&self,
|
||||||
key: &[u8],
|
key: &[u8],
|
||||||
mut kv_client: ChainfireKvClient<Channel>,
|
kv_client: ChainfireKvClient<Channel>,
|
||||||
) -> Result<String, tonic::Status> {
|
) -> Result<String, tonic::Status> {
|
||||||
let regions = list_chainfire_regions(&mut kv_client).await?;
|
for force_refresh in [false, true] {
|
||||||
let stores = list_chainfire_stores(&mut kv_client).await?;
|
let snapshot = self
|
||||||
|
.chainfire_route_snapshot(kv_client.clone(), force_refresh)
|
||||||
|
.await?;
|
||||||
|
if let Ok((region, leader)) =
|
||||||
|
self.resolve_addr_from_chainfire_snapshot(key, &snapshot)
|
||||||
|
{
|
||||||
|
self.region_cache.update(region, leader.clone()).await;
|
||||||
|
return Ok(leader.addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let region = regions
|
Err(tonic::Status::not_found("region not found"))
|
||||||
.into_iter()
|
|
||||||
.find(|region| {
|
|
||||||
let start_ok = region.start_key.is_empty() || key >= region.start_key.as_slice();
|
|
||||||
let end_ok = region.end_key.is_empty() || key < region.end_key.as_slice();
|
|
||||||
start_ok && end_ok
|
|
||||||
})
|
|
||||||
.ok_or_else(|| tonic::Status::not_found("region not found"))?;
|
|
||||||
|
|
||||||
let leader = stores
|
|
||||||
.get(®ion.leader_id)
|
|
||||||
.ok_or_else(|| tonic::Status::not_found("leader store not found"))?;
|
|
||||||
|
|
||||||
self.region_cache
|
|
||||||
.update(
|
|
||||||
Region {
|
|
||||||
id: region.id,
|
|
||||||
start_key: region.start_key,
|
|
||||||
end_key: region.end_key,
|
|
||||||
peers: region.peers,
|
|
||||||
leader_id: region.leader_id,
|
|
||||||
},
|
|
||||||
Store {
|
|
||||||
id: leader.id,
|
|
||||||
addr: leader.addr.clone(),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
Ok(leader.addr.clone())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -636,6 +756,23 @@ fn transport_endpoint(addr: &str) -> String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn normalize_transport_addr(addr: &str) -> String {
|
||||||
|
addr.trim()
|
||||||
|
.trim_start_matches("http://")
|
||||||
|
.trim_start_matches("https://")
|
||||||
|
.trim_end_matches('/')
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_transport_endpoints(addrs: &str) -> Vec<String> {
|
||||||
|
addrs
|
||||||
|
.split(',')
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|item| !item.is_empty())
|
||||||
|
.map(normalize_transport_addr)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn prefix_range_end(prefix: &str) -> Vec<u8> {
|
fn prefix_range_end(prefix: &str) -> Vec<u8> {
|
||||||
let mut end = prefix.as_bytes().to_vec();
|
let mut end = prefix.as_bytes().to_vec();
|
||||||
if let Some(last) = end.last_mut() {
|
if let Some(last) = end.last_mut() {
|
||||||
|
|
@ -696,7 +833,7 @@ async fn list_chainfire_regions(
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::RdbClient;
|
use super::{RdbClient, normalize_transport_addr, parse_transport_endpoints};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn unknown_transport_errors_are_treated_as_retryable_routes() {
|
fn unknown_transport_errors_are_treated_as_retryable_routes() {
|
||||||
|
|
@ -711,4 +848,20 @@ mod tests {
|
||||||
assert!(RdbClient::is_retryable_route_error(&status));
|
assert!(RdbClient::is_retryable_route_error(&status));
|
||||||
assert!(!RdbClient::is_transport_error(&status));
|
assert!(!RdbClient::is_transport_error(&status));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_transport_endpoints_accepts_comma_separated_values() {
|
||||||
|
assert_eq!(
|
||||||
|
parse_transport_endpoints("http://10.0.0.1:2379, 10.0.0.2:2379/"),
|
||||||
|
vec!["10.0.0.1:2379".to_string(), "10.0.0.2:2379".to_string()]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_transport_addr_strips_scheme_and_slashes() {
|
||||||
|
assert_eq!(
|
||||||
|
normalize_transport_addr("https://10.0.0.1:2479/"),
|
||||||
|
"10.0.0.1:2479".to_string()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,9 @@ struct Args {
|
||||||
#[arg(long, default_value = "127.0.0.1:2479")]
|
#[arg(long, default_value = "127.0.0.1:2479")]
|
||||||
pd_addr: String,
|
pd_addr: String,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "")]
|
||||||
|
namespace: String,
|
||||||
|
|
||||||
#[command(subcommand)]
|
#[command(subcommand)]
|
||||||
command: Commands,
|
command: Commands,
|
||||||
}
|
}
|
||||||
|
|
@ -44,7 +47,8 @@ enum Commands {
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
let mut client = RdbClient::connect_with_pd(args.addr, args.pd_addr).await?;
|
let mut client =
|
||||||
|
RdbClient::connect_with_pd_namespace(args.addr, args.pd_addr, args.namespace).await?;
|
||||||
|
|
||||||
match args.command {
|
match args.command {
|
||||||
Commands::RawPut { key, value } => {
|
Commands::RawPut { key, value } => {
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ impl Cluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn register_store(&self, addr: String) -> u64 {
|
pub fn register_store(&self, addr: String, requested_id: Option<u64>) -> u64 {
|
||||||
let mut state = self.inner.lock().unwrap();
|
let mut state = self.inner.lock().unwrap();
|
||||||
|
|
||||||
// Dedup check? For now, always new ID.
|
// Dedup check? For now, always new ID.
|
||||||
|
|
@ -39,8 +39,15 @@ impl Cluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let id = state.next_store_id;
|
let id = requested_id
|
||||||
state.next_store_id += 1;
|
.filter(|id| *id != 0 && !state.stores.contains_key(id))
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
while state.stores.contains_key(&state.next_store_id) {
|
||||||
|
state.next_store_id += 1;
|
||||||
|
}
|
||||||
|
state.next_store_id
|
||||||
|
});
|
||||||
|
state.next_store_id = state.next_store_id.max(id.saturating_add(1));
|
||||||
|
|
||||||
state.stores.insert(id, Store { id, addr });
|
state.stores.insert(id, Store { id, addr });
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,8 @@ impl Pd for PdServiceImpl {
|
||||||
request: Request<RegisterStoreRequest>,
|
request: Request<RegisterStoreRequest>,
|
||||||
) -> Result<Response<RegisterStoreResponse>, Status> {
|
) -> Result<Response<RegisterStoreResponse>, Status> {
|
||||||
let req = request.into_inner();
|
let req = request.into_inner();
|
||||||
let store_id = self.cluster.register_store(req.addr);
|
let requested_store_id = (req.store_id != 0).then_some(req.store_id);
|
||||||
|
let store_id = self.cluster.register_store(req.addr, requested_store_id);
|
||||||
Ok(Response::new(RegisterStoreResponse {
|
Ok(Response::new(RegisterStoreResponse {
|
||||||
store_id,
|
store_id,
|
||||||
cluster_id: 1, // fixed for now
|
cluster_id: 1, // fixed for now
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ service Pd {
|
||||||
|
|
||||||
message RegisterStoreRequest {
|
message RegisterStoreRequest {
|
||||||
string addr = 1; // e.g., "127.0.0.1:50051"
|
string addr = 1; // e.g., "127.0.0.1:50051"
|
||||||
|
uint64 store_id = 2; // Optional requested store ID (0 = auto-assign)
|
||||||
}
|
}
|
||||||
|
|
||||||
message RegisterStoreResponse {
|
message RegisterStoreResponse {
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,38 @@
|
||||||
use crate::store::Store;
|
use crate::store::Store;
|
||||||
use flaredb_proto::pdpb::pd_client::PdClient;
|
use flaredb_proto::pdpb::pd_client::PdClient;
|
||||||
use flaredb_proto::pdpb::ListRegionsRequest;
|
use flaredb_proto::pdpb::{ListRegionsRequest, RegisterStoreRequest};
|
||||||
use flaredb_types::RegionMeta;
|
use flaredb_types::RegionMeta;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
/// Periodically send region/store heartbeat to PD.
|
/// Periodically send region/store heartbeat to PD.
|
||||||
pub async fn start_heartbeat(pd_addr: String, store: Arc<Store>) {
|
pub async fn start_heartbeat(
|
||||||
|
pd_addr: String,
|
||||||
|
store: Arc<Store>,
|
||||||
|
server_addr: String,
|
||||||
|
requested_store_id: u64,
|
||||||
|
) {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let endpoint = format!("http://{}", pd_addr);
|
let endpoint = format!("http://{}", pd_addr);
|
||||||
loop {
|
loop {
|
||||||
if let Ok(mut client) = PdClient::connect(endpoint.clone()).await {
|
if let Ok(mut client) = PdClient::connect(endpoint.clone()).await {
|
||||||
|
if let Err(err) = client
|
||||||
|
.register_store(RegisterStoreRequest {
|
||||||
|
addr: server_addr.clone(),
|
||||||
|
store_id: requested_store_id,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tracing::warn!("failed to register store with legacy PD: {}", err);
|
||||||
|
}
|
||||||
|
|
||||||
// list regions to keep routing fresh
|
// list regions to keep routing fresh
|
||||||
if let Ok(resp) = client.list_regions(ListRegionsRequest {}).await {
|
if let Ok(resp) = client.list_regions(ListRegionsRequest {}).await {
|
||||||
let resp = resp.into_inner();
|
let resp = resp.into_inner();
|
||||||
let mut metas = Vec::new();
|
let mut metas = Vec::new();
|
||||||
for r in resp.regions {
|
for r in resp.regions {
|
||||||
let voters = if r.peers.is_empty() {
|
let voters = if r.peers.is_empty() {
|
||||||
Vec::new()
|
vec![store.store_id()]
|
||||||
} else {
|
} else {
|
||||||
r.peers.clone()
|
r.peers.clone()
|
||||||
};
|
};
|
||||||
|
|
@ -27,11 +42,7 @@ pub async fn start_heartbeat(pd_addr: String, store: Arc<Store>) {
|
||||||
start_key: r.start_key,
|
start_key: r.start_key,
|
||||||
end_key: r.end_key,
|
end_key: r.end_key,
|
||||||
},
|
},
|
||||||
if voters.is_empty() {
|
voters,
|
||||||
vec![store.store_id()]
|
|
||||||
} else {
|
|
||||||
voters
|
|
||||||
},
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
if !metas.is_empty() {
|
if !metas.is_empty() {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use flaredb_proto::kvrpc::kv_cas_server::KvCasServer;
|
use flaredb_proto::kvrpc::kv_cas_server::KvCasServer;
|
||||||
use flaredb_proto::kvrpc::kv_raw_server::KvRawServer;
|
use flaredb_proto::kvrpc::kv_raw_server::KvRawServer;
|
||||||
|
use flaredb_proto::pdpb::pd_client::PdClient as LegacyPdClient;
|
||||||
|
use flaredb_proto::pdpb::{ListRegionsRequest, RegisterStoreRequest};
|
||||||
use flaredb_proto::raft_server::raft_service_server::RaftServiceServer;
|
use flaredb_proto::raft_server::raft_service_server::RaftServiceServer;
|
||||||
use flaredb_proto::sqlrpc::sql_service_server::SqlServiceServer;
|
use flaredb_proto::sqlrpc::sql_service_server::SqlServiceServer;
|
||||||
use flaredb_server::config::{self, Config, NamespaceManager};
|
use flaredb_server::config::{self, Config, NamespaceManager};
|
||||||
|
|
@ -12,7 +14,7 @@ use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
use tonic::transport::{Certificate, Identity, Server, ServerTlsConfig};
|
use tonic::transport::{Certificate, Channel, Identity, Server, ServerTlsConfig};
|
||||||
use tonic_health::server::health_reporter;
|
use tonic_health::server::health_reporter;
|
||||||
use tracing::{info, warn}; // Import warn
|
use tracing::{info, warn}; // Import warn
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
@ -27,7 +29,7 @@ mod service;
|
||||||
mod sql_service;
|
mod sql_service;
|
||||||
mod store;
|
mod store;
|
||||||
|
|
||||||
use pd_client::{PdClient, PdEvent};
|
use pd_client::{PdClient as ChainfirePdClient, PdEvent};
|
||||||
|
|
||||||
const RAFT_GRPC_MESSAGE_SIZE: usize = 64 * 1024 * 1024;
|
const RAFT_GRPC_MESSAGE_SIZE: usize = 64 * 1024 * 1024;
|
||||||
|
|
||||||
|
|
@ -35,14 +37,18 @@ async fn connect_pd_with_retry(
|
||||||
pd_endpoints: &[String],
|
pd_endpoints: &[String],
|
||||||
attempts: u32,
|
attempts: u32,
|
||||||
delay: Duration,
|
delay: Duration,
|
||||||
) -> Option<PdClient> {
|
) -> Option<ChainfirePdClient> {
|
||||||
let mut last_error = None;
|
let mut last_error = None;
|
||||||
|
|
||||||
for attempt in 1..=attempts {
|
for attempt in 1..=attempts {
|
||||||
match PdClient::connect_any(pd_endpoints).await {
|
match ChainfirePdClient::connect_any(pd_endpoints).await {
|
||||||
Ok(client) => return Some(client),
|
Ok(client) => return Some(client),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
last_error = Some(err.to_string());
|
last_error = Some(err.to_string());
|
||||||
|
let protocol_mismatch = last_error
|
||||||
|
.as_deref()
|
||||||
|
.map(|msg| msg.contains("Unimplemented"))
|
||||||
|
.unwrap_or(false);
|
||||||
warn!(
|
warn!(
|
||||||
attempt,
|
attempt,
|
||||||
attempts,
|
attempts,
|
||||||
|
|
@ -50,6 +56,13 @@ async fn connect_pd_with_retry(
|
||||||
error = last_error.as_deref().unwrap_or("unknown"),
|
error = last_error.as_deref().unwrap_or("unknown"),
|
||||||
"Failed to connect to FlareDB PD"
|
"Failed to connect to FlareDB PD"
|
||||||
);
|
);
|
||||||
|
if protocol_mismatch {
|
||||||
|
warn!(
|
||||||
|
?pd_endpoints,
|
||||||
|
"PD endpoint does not speak ChainFire; falling back to legacy PD"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
if attempt < attempts {
|
if attempt < attempts {
|
||||||
sleep(delay).await;
|
sleep(delay).await;
|
||||||
}
|
}
|
||||||
|
|
@ -65,6 +78,49 @@ async fn connect_pd_with_retry(
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn connect_legacy_pd_with_retry(
|
||||||
|
pd_endpoints: &[String],
|
||||||
|
attempts: u32,
|
||||||
|
delay: Duration,
|
||||||
|
) -> Option<(String, LegacyPdClient<Channel>)> {
|
||||||
|
let mut last_error = None;
|
||||||
|
|
||||||
|
for attempt in 1..=attempts {
|
||||||
|
for endpoint in pd_endpoints {
|
||||||
|
let transport = if endpoint.starts_with("http") {
|
||||||
|
endpoint.clone()
|
||||||
|
} else {
|
||||||
|
format!("http://{}", endpoint)
|
||||||
|
};
|
||||||
|
match LegacyPdClient::connect(transport.clone()).await {
|
||||||
|
Ok(client) => return Some((endpoint.clone(), client)),
|
||||||
|
Err(err) => {
|
||||||
|
last_error = Some(format!("{}: {}", endpoint, err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
attempt,
|
||||||
|
attempts,
|
||||||
|
?pd_endpoints,
|
||||||
|
error = last_error.as_deref().unwrap_or("unknown"),
|
||||||
|
"Failed to connect to legacy FlareDB PD"
|
||||||
|
);
|
||||||
|
|
||||||
|
if attempt < attempts {
|
||||||
|
sleep(delay).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
?pd_endpoints,
|
||||||
|
error = last_error.as_deref().unwrap_or("unknown"),
|
||||||
|
"Exhausted legacy FlareDB PD connection retries"
|
||||||
|
);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
struct Args {
|
struct Args {
|
||||||
|
|
@ -334,7 +390,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let server_addr_string = server_config.addr.to_string();
|
let server_addr_string = server_config.addr.to_string();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let client = Arc::new(Mutex::new(
|
let client = Arc::new(Mutex::new(
|
||||||
PdClient::connect_any(&pd_endpoints_for_task).await.ok(),
|
ChainfirePdClient::connect_any(&pd_endpoints_for_task)
|
||||||
|
.await
|
||||||
|
.ok(),
|
||||||
));
|
));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
|
@ -396,7 +454,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Try to reconnect
|
// Try to reconnect
|
||||||
if let Ok(new_client) = PdClient::connect_any(&pd_endpoints_for_task).await
|
if let Ok(new_client) =
|
||||||
|
ChainfirePdClient::connect_any(&pd_endpoints_for_task).await
|
||||||
{
|
{
|
||||||
info!("Reconnected to PD");
|
info!("Reconnected to PD");
|
||||||
*guard = Some(new_client);
|
*guard = Some(new_client);
|
||||||
|
|
@ -406,6 +465,75 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
sleep(Duration::from_secs(10)).await;
|
sleep(Duration::from_secs(10)).await;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} else if let Some((legacy_pd_addr, mut legacy_pd_client)) =
|
||||||
|
connect_legacy_pd_with_retry(&pd_endpoints, 3, Duration::from_secs(1)).await
|
||||||
|
{
|
||||||
|
info!(pd_addr = %legacy_pd_addr, "Connected to legacy FlareDB PD");
|
||||||
|
|
||||||
|
match legacy_pd_client
|
||||||
|
.register_store(RegisterStoreRequest {
|
||||||
|
addr: server_config.addr.to_string(),
|
||||||
|
store_id: server_config.store_id,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(resp) => {
|
||||||
|
let resp = resp.into_inner();
|
||||||
|
if resp.store_id != 0 && resp.store_id != server_config.store_id {
|
||||||
|
warn!(
|
||||||
|
expected_store_id = server_config.store_id,
|
||||||
|
assigned_store_id = resp.store_id,
|
||||||
|
"legacy PD assigned a different store id than local config"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => warn!("failed to register with legacy PD: {}", err),
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut region_metas = Vec::new();
|
||||||
|
match legacy_pd_client.list_regions(ListRegionsRequest {}).await {
|
||||||
|
Ok(resp) => {
|
||||||
|
for region in resp.into_inner().regions {
|
||||||
|
let voters = if region.peers.is_empty() || region.peers.len() < voters.len() {
|
||||||
|
voters.clone()
|
||||||
|
} else {
|
||||||
|
region.peers.clone()
|
||||||
|
};
|
||||||
|
region_metas.push((
|
||||||
|
RegionMeta {
|
||||||
|
id: region.id,
|
||||||
|
start_key: region.start_key,
|
||||||
|
end_key: region.end_key,
|
||||||
|
},
|
||||||
|
voters,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => warn!("failed to list regions from legacy PD: {}", err),
|
||||||
|
}
|
||||||
|
|
||||||
|
if region_metas.is_empty() {
|
||||||
|
region_metas.push((
|
||||||
|
RegionMeta {
|
||||||
|
id: 1,
|
||||||
|
start_key: Vec::new(),
|
||||||
|
end_key: Vec::new(),
|
||||||
|
},
|
||||||
|
voters.clone(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = store.bootstrap_regions(region_metas).await {
|
||||||
|
warn!("failed to bootstrap regions from legacy PD: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
heartbeat::start_heartbeat(
|
||||||
|
legacy_pd_addr,
|
||||||
|
store.clone(),
|
||||||
|
server_config.addr.to_string(),
|
||||||
|
server_config.store_id,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
} else {
|
} else {
|
||||||
info!("Starting in standalone mode with default region...");
|
info!("Starting in standalone mode with default region...");
|
||||||
let _ = store
|
let _ = store
|
||||||
|
|
@ -494,6 +622,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
server_addr: server_config.addr.to_string(),
|
server_addr: server_config.addr.to_string(),
|
||||||
pd_endpoints: pd_endpoints.clone(),
|
pd_endpoints: pd_endpoints.clone(),
|
||||||
store_id: server_config.store_id,
|
store_id: server_config.store_id,
|
||||||
|
configured_peers: (*peer_addrs).clone(),
|
||||||
};
|
};
|
||||||
let rest_app = rest::build_router(rest_state);
|
let rest_app = rest::build_router(rest_state);
|
||||||
let http_listener = tokio::net::TcpListener::bind(&http_addr).await?;
|
let http_listener = tokio::net::TcpListener::bind(&http_addr).await?;
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,8 @@ use axum::{
|
||||||
};
|
};
|
||||||
use crate::pd_client::PdClient;
|
use crate::pd_client::PdClient;
|
||||||
use flaredb_client::RdbClient;
|
use flaredb_client::RdbClient;
|
||||||
use flaredb_sql::executor::{ExecutionResult, SqlExecutor};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
/// REST API state
|
/// REST API state
|
||||||
|
|
@ -26,6 +26,7 @@ pub struct RestApiState {
|
||||||
pub server_addr: String,
|
pub server_addr: String,
|
||||||
pub pd_endpoints: Vec<String>,
|
pub pd_endpoints: Vec<String>,
|
||||||
pub store_id: u64,
|
pub store_id: u64,
|
||||||
|
pub configured_peers: HashMap<u64, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Standard REST error response
|
/// Standard REST error response
|
||||||
|
|
@ -136,6 +137,15 @@ pub struct AddPeerRequest {
|
||||||
pub peer_id: u64,
|
pub peer_id: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Legacy/admin add member request for first-boot compatibility.
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct AddMemberRequestLegacy {
|
||||||
|
pub id: String,
|
||||||
|
pub raft_addr: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub addr: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Region info response
|
/// Region info response
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
pub struct RegionResponse {
|
pub struct RegionResponse {
|
||||||
|
|
@ -153,6 +163,7 @@ pub fn build_router(state: RestApiState) -> Router {
|
||||||
.route("/api/v1/scan", get(scan_kv))
|
.route("/api/v1/scan", get(scan_kv))
|
||||||
.route("/api/v1/regions/{id}", get(get_region))
|
.route("/api/v1/regions/{id}", get(get_region))
|
||||||
.route("/api/v1/regions/{id}/add_peer", post(add_peer_to_region))
|
.route("/api/v1/regions/{id}/add_peer", post(add_peer_to_region))
|
||||||
|
.route("/admin/member/add", post(add_member_legacy))
|
||||||
.route("/health", get(health_check))
|
.route("/health", get(health_check))
|
||||||
.with_state(state)
|
.with_state(state)
|
||||||
}
|
}
|
||||||
|
|
@ -320,6 +331,121 @@ async fn add_peer_to_region(
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// POST /admin/member/add - first-boot compatible cluster join hook.
|
||||||
|
async fn add_member_legacy(
|
||||||
|
State(state): State<RestApiState>,
|
||||||
|
Json(req): Json<AddMemberRequestLegacy>,
|
||||||
|
) -> Result<(StatusCode, Json<SuccessResponse<serde_json::Value>>), (StatusCode, Json<ErrorResponse>)> {
|
||||||
|
let (peer_id, peer_addr) = resolve_join_peer(&state, &req).ok_or_else(|| {
|
||||||
|
error_response(
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"INVALID_MEMBER",
|
||||||
|
"Unable to resolve FlareDB peer id/address from join request",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut pd_client = PdClient::connect_any(&state.pd_endpoints)
|
||||||
|
.await
|
||||||
|
.map_err(|e| error_response(StatusCode::SERVICE_UNAVAILABLE, "PD_UNAVAILABLE", &format!("Failed to connect to PD: {}", e)))?;
|
||||||
|
|
||||||
|
let stores = pd_client.list_stores().await;
|
||||||
|
let already_registered = stores.iter().any(|store| store.id == peer_id);
|
||||||
|
|
||||||
|
pd_client
|
||||||
|
.register_store(peer_id, peer_addr.clone())
|
||||||
|
.await
|
||||||
|
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
||||||
|
|
||||||
|
let mut regions = pd_client.list_regions().await;
|
||||||
|
if regions.is_empty() {
|
||||||
|
pd_client
|
||||||
|
.init_default_region(vec![state.store_id, peer_id])
|
||||||
|
.await
|
||||||
|
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
||||||
|
regions = vec![crate::pd_client::RegionInfo {
|
||||||
|
id: 1,
|
||||||
|
start_key: Vec::new(),
|
||||||
|
end_key: Vec::new(),
|
||||||
|
peers: vec![state.store_id, peer_id],
|
||||||
|
leader_id: 0,
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut updated_regions = Vec::new();
|
||||||
|
for mut region in regions {
|
||||||
|
if !region.peers.contains(&peer_id) {
|
||||||
|
region.peers.push(peer_id);
|
||||||
|
region.peers.sort_unstable();
|
||||||
|
pd_client
|
||||||
|
.put_region(region.clone())
|
||||||
|
.await
|
||||||
|
.map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?;
|
||||||
|
updated_regions.push(region.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let status = if already_registered && updated_regions.is_empty() {
|
||||||
|
StatusCode::CONFLICT
|
||||||
|
} else if already_registered {
|
||||||
|
StatusCode::OK
|
||||||
|
} else {
|
||||||
|
StatusCode::CREATED
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
status,
|
||||||
|
Json(SuccessResponse::new(serde_json::json!({
|
||||||
|
"peer_id": peer_id,
|
||||||
|
"addr": peer_addr,
|
||||||
|
"updated_regions": updated_regions,
|
||||||
|
"already_registered": already_registered,
|
||||||
|
}))),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_join_peer(
|
||||||
|
state: &RestApiState,
|
||||||
|
req: &AddMemberRequestLegacy,
|
||||||
|
) -> Option<(u64, String)> {
|
||||||
|
if let Ok(peer_id) = req.id.parse::<u64>() {
|
||||||
|
if let Some(addr) = req
|
||||||
|
.addr
|
||||||
|
.clone()
|
||||||
|
.or_else(|| state.configured_peers.get(&peer_id).cloned())
|
||||||
|
{
|
||||||
|
return Some((peer_id, addr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let candidate_host = socket_host(req.addr.as_deref().unwrap_or(&req.raft_addr));
|
||||||
|
state
|
||||||
|
.configured_peers
|
||||||
|
.iter()
|
||||||
|
.find(|(_, addr)| socket_host(addr) == candidate_host)
|
||||||
|
.map(|(peer_id, addr)| (*peer_id, addr.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn socket_host(addr: &str) -> String {
|
||||||
|
let normalized = addr
|
||||||
|
.trim()
|
||||||
|
.trim_start_matches("http://")
|
||||||
|
.trim_start_matches("https://")
|
||||||
|
.split('/')
|
||||||
|
.next()
|
||||||
|
.unwrap_or(addr)
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
normalized
|
||||||
|
.parse::<std::net::SocketAddr>()
|
||||||
|
.map(|socket_addr| socket_addr.ip().to_string())
|
||||||
|
.unwrap_or_else(|_| {
|
||||||
|
normalized
|
||||||
|
.rsplit_once(':')
|
||||||
|
.map(|(host, _)| host.trim_matches(['[', ']']).to_string())
|
||||||
|
.unwrap_or(normalized)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper to create error response
|
/// Helper to create error response
|
||||||
fn error_response(
|
fn error_response(
|
||||||
status: StatusCode,
|
status: StatusCode,
|
||||||
|
|
@ -338,3 +464,51 @@ fn error_response(
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn test_state() -> RestApiState {
|
||||||
|
RestApiState {
|
||||||
|
server_addr: "127.0.0.1:50052".to_string(),
|
||||||
|
pd_endpoints: vec!["127.0.0.1:2479".to_string()],
|
||||||
|
store_id: 1,
|
||||||
|
configured_peers: HashMap::from([
|
||||||
|
(1, "10.100.0.11:50052".to_string()),
|
||||||
|
(2, "10.100.0.12:50052".to_string()),
|
||||||
|
(3, "10.100.0.13:50052".to_string()),
|
||||||
|
]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_join_peer_uses_numeric_id_when_available() {
|
||||||
|
let state = test_state();
|
||||||
|
let req = AddMemberRequestLegacy {
|
||||||
|
id: "2".to_string(),
|
||||||
|
raft_addr: "10.100.0.12:2380".to_string(),
|
||||||
|
addr: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
resolve_join_peer(&state, &req),
|
||||||
|
Some((2, "10.100.0.12:50052".to_string()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_join_peer_matches_host_from_raft_addr() {
|
||||||
|
let state = test_state();
|
||||||
|
let req = AddMemberRequestLegacy {
|
||||||
|
id: "node02".to_string(),
|
||||||
|
raft_addr: "10.100.0.12:2380".to_string(),
|
||||||
|
addr: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
resolve_join_peer(&state, &req),
|
||||||
|
Some((2, "10.100.0.12:50052".to_string()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@
|
||||||
};
|
};
|
||||||
|
|
||||||
rustToolchain = pkgs.rust-bin.stable.latest.default.override {
|
rustToolchain = pkgs.rust-bin.stable.latest.default.override {
|
||||||
extensions = [ "rust-src" "rust-analyzer" ];
|
extensions = [ "rust-src" "rust-analyzer" "rustfmt" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
|
||||||
|
|
@ -6,13 +6,43 @@ if [[ -z "${IN_NIX_SHELL:-}" ]] && command -v nix >/dev/null 2>&1; then
|
||||||
exec nix develop -c "$0" "$@"
|
exec nix develop -c "$0" "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
WORKDIR=$(mktemp -d)
|
||||||
|
PD_LOG="${WORKDIR}/flaredb-pd.log"
|
||||||
|
SERVER_LOG="${WORKDIR}/flaredb-server.log"
|
||||||
|
DATA_DIR="${WORKDIR}/data"
|
||||||
|
|
||||||
|
run_client() {
|
||||||
|
local output=""
|
||||||
|
local status=0
|
||||||
|
local attempt=0
|
||||||
|
while (( attempt < 20 )); do
|
||||||
|
if output=$(cargo run --quiet --bin flaredb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 "$@" 2>&1); then
|
||||||
|
printf '%s\n' "${output}" | awk 'NF { last = $0 } END { print last }'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
status=$?
|
||||||
|
attempt=$((attempt + 1))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
printf '%s\n' "${output}" >&2
|
||||||
|
return "${status}"
|
||||||
|
}
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
|
local exit_code=$?
|
||||||
if [[ -n "${SERVER_PID:-}" ]]; then
|
if [[ -n "${SERVER_PID:-}" ]]; then
|
||||||
kill "$SERVER_PID" >/dev/null 2>&1 || true
|
kill "$SERVER_PID" >/dev/null 2>&1 || true
|
||||||
fi
|
fi
|
||||||
if [[ -n "${PD_PID:-}" ]]; then
|
if [[ -n "${PD_PID:-}" ]]; then
|
||||||
kill "$PD_PID" >/dev/null 2>&1 || true
|
kill "$PD_PID" >/dev/null 2>&1 || true
|
||||||
fi
|
fi
|
||||||
|
if (( exit_code != 0 )); then
|
||||||
|
echo "verify-core failed; logs preserved at ${WORKDIR}" >&2
|
||||||
|
[[ -f "${PD_LOG}" ]] && { echo "--- ${PD_LOG} ---" >&2; tail -n 200 "${PD_LOG}" >&2; }
|
||||||
|
[[ -f "${SERVER_LOG}" ]] && { echo "--- ${SERVER_LOG} ---" >&2; tail -n 200 "${SERVER_LOG}" >&2; }
|
||||||
|
return "${exit_code}"
|
||||||
|
fi
|
||||||
|
rm -rf "${WORKDIR}"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
|
@ -23,30 +53,38 @@ echo "Running tests..."
|
||||||
cargo test
|
cargo test
|
||||||
|
|
||||||
echo "Starting PD..."
|
echo "Starting PD..."
|
||||||
cargo run --bin rdb-pd -- --addr 127.0.0.1:2479 >/tmp/rdb-pd.log 2>&1 &
|
cargo run --bin flaredb-pd -- --addr 127.0.0.1:2479 >"${PD_LOG}" 2>&1 &
|
||||||
PD_PID=$!
|
PD_PID=$!
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
echo "Starting Server..."
|
echo "Starting Server..."
|
||||||
cargo run --bin rdb-server -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 --data-dir /tmp/rdb-server >/tmp/rdb-server.log 2>&1 &
|
cargo run --bin flaredb-server -- \
|
||||||
|
--pd-addr 127.0.0.1:2479 \
|
||||||
|
--addr 127.0.0.1:50052 \
|
||||||
|
--data-dir "${DATA_DIR}" \
|
||||||
|
--namespace-mode raw=eventual \
|
||||||
|
--namespace-mode cas=strong \
|
||||||
|
>"${SERVER_LOG}" 2>&1 &
|
||||||
SERVER_PID=$!
|
SERVER_PID=$!
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
echo "Running Client Verification..."
|
echo "Running Client Verification..."
|
||||||
|
|
||||||
echo "Testing TSO..."
|
echo "Testing TSO..."
|
||||||
cargo run --bin rdb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 tso
|
TSO_OUTPUT=$(run_client tso)
|
||||||
|
[[ "${TSO_OUTPUT}" == Timestamp:* ]]
|
||||||
|
|
||||||
echo "Testing Raw Put/Get..."
|
echo "Testing Raw Put/Get..."
|
||||||
cargo run --bin rdb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 raw-put --key foo --value bar
|
run_client --namespace raw raw-put --key foo --value bar >/dev/null
|
||||||
cargo run --bin rdb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 raw-get --key foo
|
RAW_VALUE=$(run_client --namespace raw raw-get --key foo)
|
||||||
|
[[ "${RAW_VALUE}" == "bar" ]]
|
||||||
|
|
||||||
echo "Testing CAS success..."
|
echo "Testing CAS success..."
|
||||||
cargo run --bin rdb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 cas --key cas1 --value v1 --expected 0
|
CAS_SUCCESS=$(run_client --namespace cas cas --key cas1 --value v1 --expected 0)
|
||||||
|
[[ "${CAS_SUCCESS}" == Success,* ]]
|
||||||
|
|
||||||
echo "Testing CAS conflict..."
|
echo "Testing CAS conflict..."
|
||||||
set +e
|
CAS_CONFLICT=$(run_client --namespace cas cas --key cas1 --value v2 --expected 0)
|
||||||
cargo run --bin rdb-client -- --pd-addr 127.0.0.1:2479 --addr 127.0.0.1:50052 cas --key cas1 --value v2 --expected 0
|
[[ "${CAS_CONFLICT}" == Conflict!* ]]
|
||||||
set -e
|
|
||||||
|
|
||||||
echo "Verification Complete!"
|
echo "Verification Complete!"
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,17 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# Run key Multi-Raft test suites.
|
if [[ -z "${IN_NIX_SHELL:-}" ]] && command -v nix >/dev/null 2>&1; then
|
||||||
echo "[verify] Running multi-region routing tests..."
|
exec nix develop -c "$0" "$@"
|
||||||
nix develop -c cargo test -q rdb-server::tests::test_multi_region
|
fi
|
||||||
|
|
||||||
echo "[verify] Running split tests..."
|
echo "[verify] Running persistent snapshot recovery tests..."
|
||||||
nix develop -c cargo test -q rdb-server::tests::test_split
|
cargo test -p flaredb-raft persistent_storage::tests::test_snapshot_persistence_and_recovery
|
||||||
|
|
||||||
echo "[verify] Running confchange/move tests..."
|
echo "[verify] Running leader election tests..."
|
||||||
nix develop -c cargo test -q rdb-server::tests::test_confchange_move
|
cargo test -p flaredb-raft raft_node::tests::test_leader_election
|
||||||
|
|
||||||
|
echo "[verify] Running server read-path tests..."
|
||||||
|
cargo test -p flaredb-server service::tests::scan_returns_decoded_cas_keys
|
||||||
|
|
||||||
echo "[verify] Done."
|
echo "[verify] Done."
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,23 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [[ -z "${IN_NIX_SHELL:-}" ]] && command -v nix >/dev/null 2>&1; then
|
||||||
|
exec nix develop -c "$0" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
export LIBCLANG_PATH=${LIBCLANG_PATH:-/nix/store/0zn99g048j67syaq97rczq5z0j8dsvc8-clang-21.1.2-lib/lib}
|
export LIBCLANG_PATH=${LIBCLANG_PATH:-/nix/store/0zn99g048j67syaq97rczq5z0j8dsvc8-clang-21.1.2-lib/lib}
|
||||||
|
|
||||||
echo "[verify] formatting..."
|
echo "[verify] formatting..."
|
||||||
cargo fmt --all
|
if ! find . \
|
||||||
|
-path ./target -prune -o \
|
||||||
|
-name '*.rs' -print0 | xargs -0 rustfmt --check; then
|
||||||
|
echo "[verify] rustfmt drift detected; continuing with runtime tests" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
echo "[verify] running rdb-server tests..."
|
echo "[verify] running FlareDB server tests..."
|
||||||
nix-shell -p protobuf --run "LIBCLANG_PATH=${LIBCLANG_PATH} cargo test -p rdb-server --tests"
|
cargo test -p flaredb-server --tests
|
||||||
|
|
||||||
|
echo "[verify] running FlareDB raft tests..."
|
||||||
|
cargo test -p flaredb-raft
|
||||||
|
|
||||||
echo "[verify] done."
|
echo "[verify] done."
|
||||||
|
|
|
||||||
|
|
@ -1,40 +1,103 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -e
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [[ -z "${IN_NIX_SHELL:-}" ]] && command -v nix >/dev/null 2>&1; then
|
||||||
|
exec nix develop -c "$0" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORKDIR=$(mktemp -d)
|
||||||
|
PD_LOG="${WORKDIR}/flaredb-pd.log"
|
||||||
|
S1_LOG="${WORKDIR}/flaredb-server-1.log"
|
||||||
|
S2_LOG="${WORKDIR}/flaredb-server-2.log"
|
||||||
|
|
||||||
|
run_client() {
|
||||||
|
local addr="$1"
|
||||||
|
shift
|
||||||
|
local output=""
|
||||||
|
local status=0
|
||||||
|
local attempt=0
|
||||||
|
while (( attempt < 20 )); do
|
||||||
|
if output=$(cargo run --quiet --bin flaredb-client -- --addr "${addr}" --pd-addr 127.0.0.1:2479 "$@" 2>&1); then
|
||||||
|
printf '%s\n' "${output}" | awk 'NF { last = $0 } END { print last }'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
status=$?
|
||||||
|
attempt=$((attempt + 1))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
printf '%s\n' "${output}" >&2
|
||||||
|
return "${status}"
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
local exit_code=$?
|
||||||
|
if [[ -n "${PD_PID:-}" ]]; then
|
||||||
|
kill "${PD_PID}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
if [[ -n "${S1_PID:-}" ]]; then
|
||||||
|
kill "${S1_PID}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
if [[ -n "${S2_PID:-}" ]]; then
|
||||||
|
kill "${S2_PID}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
if (( exit_code != 0 )); then
|
||||||
|
echo "verify-sharding failed; logs preserved at ${WORKDIR}" >&2
|
||||||
|
[[ -f "${PD_LOG}" ]] && { echo "--- ${PD_LOG} ---" >&2; tail -n 200 "${PD_LOG}" >&2; }
|
||||||
|
[[ -f "${S1_LOG}" ]] && { echo "--- ${S1_LOG} ---" >&2; tail -n 200 "${S1_LOG}" >&2; }
|
||||||
|
[[ -f "${S2_LOG}" ]] && { echo "--- ${S2_LOG} ---" >&2; tail -n 200 "${S2_LOG}" >&2; }
|
||||||
|
return "${exit_code}"
|
||||||
|
fi
|
||||||
|
rm -rf "${WORKDIR}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
echo "Building workspace..."
|
echo "Building workspace..."
|
||||||
cargo build
|
cargo build
|
||||||
|
|
||||||
echo "Starting PD..."
|
echo "Starting PD..."
|
||||||
cargo run --bin rdb-pd -- --addr 127.0.0.1:2479 &
|
cargo run --bin flaredb-pd -- --addr 127.0.0.1:2479 >"${PD_LOG}" 2>&1 &
|
||||||
PD_PID=$!
|
PD_PID=$!
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
echo "Starting Server 1 (127.0.0.1:50001, data1)..."
|
echo "Starting Server 1 (127.0.0.1:50001, data1)..."
|
||||||
# Port 50001
|
cargo run --bin flaredb-server -- \
|
||||||
cargo run --bin rdb-server -- --addr 127.0.0.1:50001 --data-dir data1 --pd-addr 127.0.0.1:2479 &
|
--store-id 1 \
|
||||||
|
--addr 127.0.0.1:50001 \
|
||||||
|
--http-addr 127.0.0.1:8083 \
|
||||||
|
--data-dir "${WORKDIR}/data1" \
|
||||||
|
--pd-addr 127.0.0.1:2479 \
|
||||||
|
--metrics-port 9093 \
|
||||||
|
--namespace-mode raw=eventual \
|
||||||
|
>"${S1_LOG}" 2>&1 &
|
||||||
S1_PID=$!
|
S1_PID=$!
|
||||||
|
sleep 4
|
||||||
|
|
||||||
echo "Starting Server 2 (127.0.0.1:50002, data2)..."
|
echo "Starting Server 2 (127.0.0.1:50002, data2)..."
|
||||||
# Port 50002
|
cargo run --bin flaredb-server -- \
|
||||||
cargo run --bin rdb-server -- --addr 127.0.0.1:50002 --data-dir data2 --pd-addr 127.0.0.1:2479 &
|
--store-id 2 \
|
||||||
|
--addr 127.0.0.1:50002 \
|
||||||
|
--http-addr 127.0.0.1:8084 \
|
||||||
|
--data-dir "${WORKDIR}/data2" \
|
||||||
|
--pd-addr 127.0.0.1:2479 \
|
||||||
|
--metrics-port 9094 \
|
||||||
|
--namespace-mode raw=eventual \
|
||||||
|
>"${S2_LOG}" 2>&1 &
|
||||||
S2_PID=$!
|
S2_PID=$!
|
||||||
|
|
||||||
sleep 5 # Wait for registration
|
sleep 5 # Wait for registration and leader routing to settle
|
||||||
|
|
||||||
echo "Running Client Verification (Sharding)..."
|
echo "Running Client Verification (multi-node routing smoke)..."
|
||||||
|
|
||||||
# Put 'a' (Should go to S1)
|
|
||||||
echo "Testing Put 'a'..."
|
echo "Testing Put 'a'..."
|
||||||
cargo run --bin rdb-client -- --addr 127.0.0.1:50001 --pd-addr 127.0.0.1:2479 raw-put --key a --value val_a
|
run_client 127.0.0.1:50001 --namespace raw raw-put --key a --value val_a >/dev/null
|
||||||
|
|
||||||
# Put 'z' (Should go to S2)
|
|
||||||
echo "Testing Put 'z'..."
|
echo "Testing Put 'z'..."
|
||||||
cargo run --bin rdb-client -- --addr 127.0.0.1:50001 --pd-addr 127.0.0.1:2479 raw-put --key z --value val_z
|
run_client 127.0.0.1:50002 --namespace raw raw-put --key z --value val_z >/dev/null
|
||||||
|
|
||||||
# Cleanup
|
echo "Testing reads from both nodes..."
|
||||||
kill $PD_PID
|
VALUE_A=$(run_client 127.0.0.1:50002 --namespace raw raw-get --key a)
|
||||||
kill $S1_PID
|
VALUE_Z=$(run_client 127.0.0.1:50001 --namespace raw raw-get --key z)
|
||||||
kill $S2_PID
|
[[ "${VALUE_A}" == "val_a" ]]
|
||||||
rm -rf data1 data2
|
[[ "${VALUE_Z}" == "val_z" ]]
|
||||||
|
|
||||||
echo "Sharding Verification Complete!"
|
echo "Sharding Verification Complete!"
|
||||||
|
|
|
||||||
607
flashdns/Cargo.lock
generated
607
flashdns/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
621
iam/Cargo.lock
generated
621
iam/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -23,6 +23,9 @@ prost = { workspace = true }
|
||||||
base64 = { workspace = true }
|
base64 = { workspace = true }
|
||||||
sha2 = { workspace = true }
|
sha2 = { workspace = true }
|
||||||
uuid = { workspace = true }
|
uuid = { workspace = true }
|
||||||
|
aes-gcm = "0.10"
|
||||||
|
argon2 = "0.5"
|
||||||
|
rand_core = "0.6"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio = { workspace = true, features = ["full", "test-util"] }
|
tokio = { workspace = true, features = ["full", "test-util"] }
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,12 @@ use rand_core::{OsRng, RngCore};
|
||||||
use tonic::{Request, Response, Status};
|
use tonic::{Request, Response, Status};
|
||||||
|
|
||||||
use iam_store::CredentialStore;
|
use iam_store::CredentialStore;
|
||||||
use iam_types::{Argon2Params, CredentialRecord};
|
use iam_types::{Argon2Params, CredentialRecord, PrincipalKind as TypesPrincipalKind};
|
||||||
|
|
||||||
use crate::proto::{
|
use crate::proto::{
|
||||||
iam_credential_server::IamCredential, CreateS3CredentialRequest,
|
iam_credential_server::IamCredential, CreateS3CredentialRequest,
|
||||||
CreateS3CredentialResponse, Credential, GetSecretKeyRequest, GetSecretKeyResponse,
|
CreateS3CredentialResponse, Credential, GetSecretKeyRequest, GetSecretKeyResponse,
|
||||||
ListCredentialsRequest, ListCredentialsResponse, RevokeCredentialRequest,
|
ListCredentialsRequest, ListCredentialsResponse, PrincipalKind, RevokeCredentialRequest,
|
||||||
RevokeCredentialResponse,
|
RevokeCredentialResponse,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -95,6 +95,15 @@ impl IamCredentialService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn map_principal_kind(kind: i32) -> Result<TypesPrincipalKind, Status> {
|
||||||
|
match PrincipalKind::try_from(kind).unwrap_or(PrincipalKind::Unspecified) {
|
||||||
|
PrincipalKind::User => Ok(TypesPrincipalKind::User),
|
||||||
|
PrincipalKind::ServiceAccount => Ok(TypesPrincipalKind::ServiceAccount),
|
||||||
|
PrincipalKind::Group => Ok(TypesPrincipalKind::Group),
|
||||||
|
PrincipalKind::Unspecified => Err(Status::invalid_argument("principal_kind is required")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tonic::async_trait]
|
#[tonic::async_trait]
|
||||||
impl IamCredential for IamCredentialService {
|
impl IamCredential for IamCredentialService {
|
||||||
async fn create_s3_credential(
|
async fn create_s3_credential(
|
||||||
|
|
@ -103,6 +112,7 @@ impl IamCredential for IamCredentialService {
|
||||||
) -> Result<Response<CreateS3CredentialResponse>, Status> {
|
) -> Result<Response<CreateS3CredentialResponse>, Status> {
|
||||||
let req = request.into_inner();
|
let req = request.into_inner();
|
||||||
let now = now_ts();
|
let now = now_ts();
|
||||||
|
let principal_kind = map_principal_kind(req.principal_kind)?;
|
||||||
let (secret_b64, raw_secret) = Self::generate_secret();
|
let (secret_b64, raw_secret) = Self::generate_secret();
|
||||||
let (hash, kdf) = Self::hash_secret(&raw_secret);
|
let (hash, kdf) = Self::hash_secret(&raw_secret);
|
||||||
let secret_enc = self.encrypt_secret(&raw_secret)?;
|
let secret_enc = self.encrypt_secret(&raw_secret)?;
|
||||||
|
|
@ -111,6 +121,9 @@ impl IamCredential for IamCredentialService {
|
||||||
let record = CredentialRecord {
|
let record = CredentialRecord {
|
||||||
access_key_id: access_key_id.clone(),
|
access_key_id: access_key_id.clone(),
|
||||||
principal_id: req.principal_id.clone(),
|
principal_id: req.principal_id.clone(),
|
||||||
|
principal_kind,
|
||||||
|
org_id: req.org_id.clone(),
|
||||||
|
project_id: req.project_id.clone(),
|
||||||
created_at: now,
|
created_at: now,
|
||||||
expires_at: req.expires_at,
|
expires_at: req.expires_at,
|
||||||
revoked: false,
|
revoked: false,
|
||||||
|
|
@ -168,6 +181,13 @@ impl IamCredential for IamCredentialService {
|
||||||
secret_key: STANDARD.encode(secret),
|
secret_key: STANDARD.encode(secret),
|
||||||
principal_id: record.principal_id,
|
principal_id: record.principal_id,
|
||||||
expires_at: record.expires_at,
|
expires_at: record.expires_at,
|
||||||
|
org_id: record.org_id,
|
||||||
|
project_id: record.project_id,
|
||||||
|
principal_kind: match record.principal_kind {
|
||||||
|
TypesPrincipalKind::User => PrincipalKind::User as i32,
|
||||||
|
TypesPrincipalKind::ServiceAccount => PrincipalKind::ServiceAccount as i32,
|
||||||
|
TypesPrincipalKind::Group => PrincipalKind::Group as i32,
|
||||||
|
},
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -190,6 +210,13 @@ impl IamCredential for IamCredentialService {
|
||||||
expires_at: c.expires_at,
|
expires_at: c.expires_at,
|
||||||
revoked: c.revoked,
|
revoked: c.revoked,
|
||||||
description: c.description.unwrap_or_default(),
|
description: c.description.unwrap_or_default(),
|
||||||
|
org_id: c.org_id,
|
||||||
|
project_id: c.project_id,
|
||||||
|
principal_kind: match c.principal_kind {
|
||||||
|
TypesPrincipalKind::User => PrincipalKind::User as i32,
|
||||||
|
TypesPrincipalKind::ServiceAccount => PrincipalKind::ServiceAccount as i32,
|
||||||
|
TypesPrincipalKind::Group => PrincipalKind::Group as i32,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
Ok(Response::new(ListCredentialsResponse { credentials: creds }))
|
Ok(Response::new(ListCredentialsResponse { credentials: creds }))
|
||||||
|
|
@ -230,6 +257,9 @@ mod tests {
|
||||||
principal_id: "p1".into(),
|
principal_id: "p1".into(),
|
||||||
description: "".into(),
|
description: "".into(),
|
||||||
expires_at: None,
|
expires_at: None,
|
||||||
|
org_id: Some("org-a".into()),
|
||||||
|
project_id: Some("project-a".into()),
|
||||||
|
principal_kind: PrincipalKind::ServiceAccount as i32,
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -247,6 +277,9 @@ mod tests {
|
||||||
let fetched = STANDARD.decode(get.secret_key).unwrap();
|
let fetched = STANDARD.decode(get.secret_key).unwrap();
|
||||||
assert_eq!(orig, fetched);
|
assert_eq!(orig, fetched);
|
||||||
assert_eq!(get.principal_id, "p1");
|
assert_eq!(get.principal_id, "p1");
|
||||||
|
assert_eq!(get.org_id.as_deref(), Some("org-a"));
|
||||||
|
assert_eq!(get.project_id.as_deref(), Some("project-a"));
|
||||||
|
assert_eq!(get.principal_kind, PrincipalKind::ServiceAccount as i32);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|
@ -257,6 +290,9 @@ mod tests {
|
||||||
principal_id: "pA".into(),
|
principal_id: "pA".into(),
|
||||||
description: "".into(),
|
description: "".into(),
|
||||||
expires_at: None,
|
expires_at: None,
|
||||||
|
org_id: Some("org-a".into()),
|
||||||
|
project_id: Some("project-a".into()),
|
||||||
|
principal_kind: PrincipalKind::ServiceAccount as i32,
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -266,6 +302,9 @@ mod tests {
|
||||||
principal_id: "pB".into(),
|
principal_id: "pB".into(),
|
||||||
description: "".into(),
|
description: "".into(),
|
||||||
expires_at: None,
|
expires_at: None,
|
||||||
|
org_id: Some("org-b".into()),
|
||||||
|
project_id: Some("project-b".into()),
|
||||||
|
principal_kind: PrincipalKind::ServiceAccount as i32,
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
@ -289,6 +328,9 @@ mod tests {
|
||||||
principal_id: "p1".into(),
|
principal_id: "p1".into(),
|
||||||
description: "".into(),
|
description: "".into(),
|
||||||
expires_at: None,
|
expires_at: None,
|
||||||
|
org_id: Some("org-a".into()),
|
||||||
|
project_id: Some("project-a".into()),
|
||||||
|
principal_kind: PrincipalKind::ServiceAccount as i32,
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -297,7 +339,6 @@ mod tests {
|
||||||
let revoke1 = svc
|
let revoke1 = svc
|
||||||
.revoke_credential(Request::new(RevokeCredentialRequest {
|
.revoke_credential(Request::new(RevokeCredentialRequest {
|
||||||
access_key_id: created.access_key_id.clone(),
|
access_key_id: created.access_key_id.clone(),
|
||||||
reason: "test".into(),
|
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -307,7 +348,6 @@ mod tests {
|
||||||
let revoke2 = svc
|
let revoke2 = svc
|
||||||
.revoke_credential(Request::new(RevokeCredentialRequest {
|
.revoke_credential(Request::new(RevokeCredentialRequest {
|
||||||
access_key_id: created.access_key_id.clone(),
|
access_key_id: created.access_key_id.clone(),
|
||||||
reason: "again".into(),
|
|
||||||
}))
|
}))
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -330,6 +370,9 @@ mod tests {
|
||||||
let expired = CredentialRecord {
|
let expired = CredentialRecord {
|
||||||
access_key_id: "expired-ak".into(),
|
access_key_id: "expired-ak".into(),
|
||||||
principal_id: "p1".into(),
|
principal_id: "p1".into(),
|
||||||
|
principal_kind: TypesPrincipalKind::ServiceAccount,
|
||||||
|
org_id: Some("org-a".into()),
|
||||||
|
project_id: Some("project-a".into()),
|
||||||
created_at: now_ts(),
|
created_at: now_ts(),
|
||||||
expires_at: Some(now_ts() - 10),
|
expires_at: Some(now_ts() - 10),
|
||||||
revoked: false,
|
revoked: false,
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
mod conversions;
|
mod conversions;
|
||||||
|
mod credential_service;
|
||||||
mod gateway_auth_service;
|
mod gateway_auth_service;
|
||||||
mod generated;
|
mod generated;
|
||||||
pub mod iam_service;
|
pub mod iam_service;
|
||||||
|
|
@ -8,7 +9,10 @@ pub mod proto {
|
||||||
pub use crate::generated::iam::v1::*;
|
pub use crate::generated::iam::v1::*;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub use generated::iam::v1::{iam_admin_server, iam_authz_server, iam_token_server};
|
pub use generated::iam::v1::{
|
||||||
|
iam_admin_server, iam_authz_server, iam_credential_server, iam_token_server,
|
||||||
|
};
|
||||||
|
pub use credential_service::IamCredentialService;
|
||||||
pub use gateway_auth_service::GatewayAuthServiceImpl;
|
pub use gateway_auth_service::GatewayAuthServiceImpl;
|
||||||
pub use iam_service::{IamAdminService, IamAuthzService};
|
pub use iam_service::{IamAdminService, IamAuthzService};
|
||||||
pub use token_service::IamTokenService;
|
pub use token_service::IamTokenService;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
//!
|
//!
|
||||||
//! Provides a thin gRPC client for interacting with the IAM service.
|
//! Provides a thin gRPC client for interacting with the IAM service.
|
||||||
|
|
||||||
|
use std::future::Future;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use iam_api::proto::{
|
use iam_api::proto::{
|
||||||
|
|
@ -19,6 +20,10 @@ use iam_types::{
|
||||||
};
|
};
|
||||||
use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
|
use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
|
||||||
|
|
||||||
|
const TRANSIENT_RPC_RETRY_ATTEMPTS: usize = 3;
|
||||||
|
const TRANSIENT_RPC_INITIAL_BACKOFF: Duration = Duration::from_millis(200);
|
||||||
|
const TRANSIENT_RPC_MAX_BACKOFF: Duration = Duration::from_millis(1_000);
|
||||||
|
|
||||||
/// Configuration for the IAM client
|
/// Configuration for the IAM client
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct IamClientConfig {
|
pub struct IamClientConfig {
|
||||||
|
|
@ -100,6 +105,40 @@ impl IamClient {
|
||||||
IamTokenClient::new(self.channel.clone())
|
IamTokenClient::new(self.channel.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn call_with_retry<T, F, Fut>(operation: &'static str, mut op: F) -> Result<T>
|
||||||
|
where
|
||||||
|
F: FnMut() -> Fut,
|
||||||
|
Fut: Future<Output = std::result::Result<T, tonic::Status>>,
|
||||||
|
{
|
||||||
|
let mut last_status = None;
|
||||||
|
for attempt in 0..TRANSIENT_RPC_RETRY_ATTEMPTS {
|
||||||
|
match op().await {
|
||||||
|
Ok(value) => return Ok(value),
|
||||||
|
Err(status)
|
||||||
|
if attempt + 1 < TRANSIENT_RPC_RETRY_ATTEMPTS
|
||||||
|
&& is_retryable_status(&status) =>
|
||||||
|
{
|
||||||
|
let delay = retry_delay(attempt);
|
||||||
|
tracing::warn!(
|
||||||
|
operation,
|
||||||
|
attempt = attempt + 1,
|
||||||
|
retry_after_ms = delay.as_millis() as u64,
|
||||||
|
code = ?status.code(),
|
||||||
|
message = status.message(),
|
||||||
|
"retrying transient IAM RPC"
|
||||||
|
);
|
||||||
|
last_status = Some(status);
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
}
|
||||||
|
Err(status) => return Err(map_status(status)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(map_status(last_status.unwrap_or_else(|| {
|
||||||
|
tonic::Status::internal(format!("IAM RPC {operation} failed without a status"))
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
|
||||||
// ========================================================================
|
// ========================================================================
|
||||||
// Authorization APIs
|
// Authorization APIs
|
||||||
// ========================================================================
|
// ========================================================================
|
||||||
|
|
@ -128,7 +167,6 @@ impl IamClient {
|
||||||
resource: &Resource,
|
resource: &Resource,
|
||||||
context: std::collections::HashMap<String, String>,
|
context: std::collections::HashMap<String, String>,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let mut client = self.authz_client();
|
|
||||||
let request = AuthorizeRequest {
|
let request = AuthorizeRequest {
|
||||||
principal: Some(to_proto_principal_ref(&principal.to_ref())),
|
principal: Some(to_proto_principal_ref(&principal.to_ref())),
|
||||||
action: action.to_string(),
|
action: action.to_string(),
|
||||||
|
|
@ -151,11 +189,13 @@ impl IamClient {
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("authorize", || {
|
||||||
.authorize(request)
|
let mut client = self.authz_client();
|
||||||
.await
|
let request = request.clone();
|
||||||
.map_err(map_status)?
|
async move { client.authorize(request).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
|
||||||
Ok(resp.allowed)
|
Ok(resp.allowed)
|
||||||
}
|
}
|
||||||
|
|
@ -166,7 +206,6 @@ impl IamClient {
|
||||||
|
|
||||||
/// Create a new user
|
/// Create a new user
|
||||||
pub async fn create_user(&self, id: &str, name: &str) -> Result<Principal> {
|
pub async fn create_user(&self, id: &str, name: &str) -> Result<Principal> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = CreatePrincipalRequest {
|
let req = CreatePrincipalRequest {
|
||||||
id: id.into(),
|
id: id.into(),
|
||||||
kind: ProtoPrincipalKind::User as i32,
|
kind: ProtoPrincipalKind::User as i32,
|
||||||
|
|
@ -177,25 +216,31 @@ impl IamClient {
|
||||||
metadata: Default::default(),
|
metadata: Default::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("create_principal", || {
|
||||||
.create_principal(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.create_principal(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(ProtoPrincipal::into(resp))
|
Ok(ProtoPrincipal::into(resp))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a principal
|
/// Get a principal
|
||||||
pub async fn get_principal(&self, principal_ref: &PrincipalRef) -> Result<Option<Principal>> {
|
pub async fn get_principal(&self, principal_ref: &PrincipalRef) -> Result<Option<Principal>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = GetPrincipalRequest {
|
let req = GetPrincipalRequest {
|
||||||
principal: Some(to_proto_principal_ref(principal_ref)),
|
principal: Some(to_proto_principal_ref(principal_ref)),
|
||||||
};
|
};
|
||||||
let resp = client.get_principal(req).await;
|
let resp = Self::call_with_retry("get_principal", || {
|
||||||
|
let mut client = self.admin_client();
|
||||||
|
let req = req.clone();
|
||||||
|
async move { client.get_principal(req).await }
|
||||||
|
})
|
||||||
|
.await;
|
||||||
match resp {
|
match resp {
|
||||||
Ok(r) => Ok(Some(ProtoPrincipal::into(r.into_inner()))),
|
Ok(r) => Ok(Some(ProtoPrincipal::into(r.into_inner()))),
|
||||||
Err(status) if status.code() == tonic::Code::NotFound => Ok(None),
|
Err(Error::Internal(message)) if tonic_not_found(&message) => Ok(None),
|
||||||
Err(status) => Err(map_status(status)),
|
Err(err) => Err(err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -206,7 +251,6 @@ impl IamClient {
|
||||||
name: &str,
|
name: &str,
|
||||||
project_id: &str,
|
project_id: &str,
|
||||||
) -> Result<Principal> {
|
) -> Result<Principal> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = CreatePrincipalRequest {
|
let req = CreatePrincipalRequest {
|
||||||
id: id.into(),
|
id: id.into(),
|
||||||
kind: ProtoPrincipalKind::ServiceAccount as i32,
|
kind: ProtoPrincipalKind::ServiceAccount as i32,
|
||||||
|
|
@ -216,17 +260,18 @@ impl IamClient {
|
||||||
email: None,
|
email: None,
|
||||||
metadata: Default::default(),
|
metadata: Default::default(),
|
||||||
};
|
};
|
||||||
let resp = client
|
let resp = Self::call_with_retry("create_service_account", || {
|
||||||
.create_principal(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.create_principal(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(ProtoPrincipal::into(resp))
|
Ok(ProtoPrincipal::into(resp))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List users
|
/// List users
|
||||||
pub async fn list_users(&self) -> Result<Vec<Principal>> {
|
pub async fn list_users(&self) -> Result<Vec<Principal>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = ListPrincipalsRequest {
|
let req = ListPrincipalsRequest {
|
||||||
kind: Some(ProtoPrincipalKind::User as i32),
|
kind: Some(ProtoPrincipalKind::User as i32),
|
||||||
org_id: None,
|
org_id: None,
|
||||||
|
|
@ -235,11 +280,13 @@ impl IamClient {
|
||||||
page_token: String::new(),
|
page_token: String::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("list_principals", || {
|
||||||
.list_principals(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.list_principals(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
|
||||||
Ok(resp
|
Ok(resp
|
||||||
.principals
|
.principals
|
||||||
|
|
@ -254,36 +301,40 @@ impl IamClient {
|
||||||
|
|
||||||
/// Get a role by name
|
/// Get a role by name
|
||||||
pub async fn get_role(&self, name: &str) -> Result<Option<Role>> {
|
pub async fn get_role(&self, name: &str) -> Result<Option<Role>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = GetRoleRequest { name: name.into() };
|
let req = GetRoleRequest { name: name.into() };
|
||||||
let resp = client.get_role(req).await;
|
let resp = Self::call_with_retry("get_role", || {
|
||||||
|
let mut client = self.admin_client();
|
||||||
|
let req = req.clone();
|
||||||
|
async move { client.get_role(req).await }
|
||||||
|
})
|
||||||
|
.await;
|
||||||
match resp {
|
match resp {
|
||||||
Ok(r) => Ok(Some(r.into_inner().into())),
|
Ok(r) => Ok(Some(r.into_inner().into())),
|
||||||
Err(status) if status.code() == tonic::Code::NotFound => Ok(None),
|
Err(Error::Internal(message)) if tonic_not_found(&message) => Ok(None),
|
||||||
Err(status) => Err(map_status(status)),
|
Err(err) => Err(err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List all roles
|
/// List all roles
|
||||||
pub async fn list_roles(&self) -> Result<Vec<Role>> {
|
pub async fn list_roles(&self) -> Result<Vec<Role>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = ListRolesRequest {
|
let req = ListRolesRequest {
|
||||||
scope: None,
|
scope: None,
|
||||||
include_builtin: true,
|
include_builtin: true,
|
||||||
page_size: 0,
|
page_size: 0,
|
||||||
page_token: String::new(),
|
page_token: String::new(),
|
||||||
};
|
};
|
||||||
let resp = client
|
let resp = Self::call_with_retry("list_roles", || {
|
||||||
.list_roles(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.list_roles(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.roles.into_iter().map(Into::into).collect())
|
Ok(resp.roles.into_iter().map(Into::into).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a custom role
|
/// Create a custom role
|
||||||
pub async fn create_role(&self, role: &Role) -> Result<Role> {
|
pub async fn create_role(&self, role: &Role) -> Result<Role> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = CreateRoleRequest {
|
let req = CreateRoleRequest {
|
||||||
name: role.name.clone(),
|
name: role.name.clone(),
|
||||||
display_name: role.display_name.clone(),
|
display_name: role.display_name.clone(),
|
||||||
|
|
@ -297,11 +348,13 @@ impl IamClient {
|
||||||
.collect(),
|
.collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("create_role", || {
|
||||||
.create_role(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.create_role(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.into())
|
Ok(resp.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -311,7 +364,6 @@ impl IamClient {
|
||||||
|
|
||||||
/// Create a policy binding
|
/// Create a policy binding
|
||||||
pub async fn create_binding(&self, binding: &PolicyBinding) -> Result<PolicyBinding> {
|
pub async fn create_binding(&self, binding: &PolicyBinding) -> Result<PolicyBinding> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = CreateBindingRequest {
|
let req = CreateBindingRequest {
|
||||||
principal: Some(to_proto_principal_ref(&binding.principal_ref)),
|
principal: Some(to_proto_principal_ref(&binding.principal_ref)),
|
||||||
role: binding.role_ref.clone(),
|
role: binding.role_ref.clone(),
|
||||||
|
|
@ -320,25 +372,28 @@ impl IamClient {
|
||||||
expires_at: binding.expires_at,
|
expires_at: binding.expires_at,
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("create_binding", || {
|
||||||
.create_binding(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.create_binding(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.into())
|
Ok(resp.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Delete a policy binding
|
/// Delete a policy binding
|
||||||
pub async fn delete_binding(&self, binding_id: &str) -> Result<bool> {
|
pub async fn delete_binding(&self, binding_id: &str) -> Result<bool> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = DeleteBindingRequest {
|
let req = DeleteBindingRequest {
|
||||||
id: binding_id.into(),
|
id: binding_id.into(),
|
||||||
};
|
};
|
||||||
let resp = client
|
let resp = Self::call_with_retry("delete_binding", || {
|
||||||
.delete_binding(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.delete_binding(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.deleted)
|
Ok(resp.deleted)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -347,7 +402,6 @@ impl IamClient {
|
||||||
&self,
|
&self,
|
||||||
principal: &PrincipalRef,
|
principal: &PrincipalRef,
|
||||||
) -> Result<Vec<PolicyBinding>> {
|
) -> Result<Vec<PolicyBinding>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = ListBindingsRequest {
|
let req = ListBindingsRequest {
|
||||||
principal: Some(to_proto_principal_ref(principal)),
|
principal: Some(to_proto_principal_ref(principal)),
|
||||||
role: None,
|
role: None,
|
||||||
|
|
@ -357,17 +411,18 @@ impl IamClient {
|
||||||
page_token: String::new(),
|
page_token: String::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("list_bindings_for_principal", || {
|
||||||
.list_bindings(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.list_bindings(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.bindings.into_iter().map(Into::into).collect())
|
Ok(resp.bindings.into_iter().map(Into::into).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List bindings for a scope
|
/// List bindings for a scope
|
||||||
pub async fn list_bindings_for_scope(&self, scope: &Scope) -> Result<Vec<PolicyBinding>> {
|
pub async fn list_bindings_for_scope(&self, scope: &Scope) -> Result<Vec<PolicyBinding>> {
|
||||||
let mut client = self.admin_client();
|
|
||||||
let req = ListBindingsRequest {
|
let req = ListBindingsRequest {
|
||||||
principal: None,
|
principal: None,
|
||||||
role: None,
|
role: None,
|
||||||
|
|
@ -377,11 +432,13 @@ impl IamClient {
|
||||||
page_token: String::new(),
|
page_token: String::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("list_bindings_for_scope", || {
|
||||||
.list_bindings(req)
|
let mut client = self.admin_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.list_bindings(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.bindings.into_iter().map(Into::into).collect())
|
Ok(resp.bindings.into_iter().map(Into::into).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -397,7 +454,6 @@ impl IamClient {
|
||||||
scope: Scope,
|
scope: Scope,
|
||||||
ttl_seconds: u64,
|
ttl_seconds: u64,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let mut client = self.token_client();
|
|
||||||
let req = IssueTokenRequest {
|
let req = IssueTokenRequest {
|
||||||
principal_id: principal.id.clone(),
|
principal_id: principal.id.clone(),
|
||||||
principal_kind: match principal.kind {
|
principal_kind: match principal.kind {
|
||||||
|
|
@ -410,25 +466,28 @@ impl IamClient {
|
||||||
ttl_seconds,
|
ttl_seconds,
|
||||||
};
|
};
|
||||||
|
|
||||||
let resp = client
|
let resp = Self::call_with_retry("issue_token", || {
|
||||||
.issue_token(req)
|
let mut client = self.token_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.issue_token(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(resp.token)
|
Ok(resp.token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Validate a token
|
/// Validate a token
|
||||||
pub async fn validate_token(&self, token: &str) -> Result<InternalTokenClaims> {
|
pub async fn validate_token(&self, token: &str) -> Result<InternalTokenClaims> {
|
||||||
let mut client = self.token_client();
|
|
||||||
let req = ValidateTokenRequest {
|
let req = ValidateTokenRequest {
|
||||||
token: token.to_string(),
|
token: token.to_string(),
|
||||||
};
|
};
|
||||||
let resp = client
|
let resp = Self::call_with_retry("validate_token", || {
|
||||||
.validate_token(req)
|
let mut client = self.token_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.validate_token(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
|
||||||
if !resp.valid {
|
if !resp.valid {
|
||||||
return Err(Error::Iam(IamError::InvalidToken(resp.reason.clone())));
|
return Err(Error::Iam(IamError::InvalidToken(resp.reason.clone())));
|
||||||
|
|
@ -479,20 +538,55 @@ impl IamClient {
|
||||||
|
|
||||||
/// Revoke a token
|
/// Revoke a token
|
||||||
pub async fn revoke_token(&self, token: &str) -> Result<()> {
|
pub async fn revoke_token(&self, token: &str) -> Result<()> {
|
||||||
let mut client = self.token_client();
|
|
||||||
let req = RevokeTokenRequest {
|
let req = RevokeTokenRequest {
|
||||||
token: token.to_string(),
|
token: token.to_string(),
|
||||||
reason: "client revoke".into(),
|
reason: "client revoke".into(),
|
||||||
};
|
};
|
||||||
client
|
Self::call_with_retry("revoke_token", || {
|
||||||
.revoke_token(req)
|
let mut client = self.token_client();
|
||||||
.await
|
let req = req.clone();
|
||||||
.map_err(map_status)?
|
async move { client.revoke_token(req).await }
|
||||||
.into_inner();
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn retry_delay(attempt: usize) -> Duration {
|
||||||
|
TRANSIENT_RPC_INITIAL_BACKOFF
|
||||||
|
.saturating_mul(1u32 << attempt.min(3))
|
||||||
|
.min(TRANSIENT_RPC_MAX_BACKOFF)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_retryable_status(status: &tonic::Status) -> bool {
|
||||||
|
matches!(
|
||||||
|
status.code(),
|
||||||
|
tonic::Code::Unavailable
|
||||||
|
| tonic::Code::Cancelled
|
||||||
|
| tonic::Code::DeadlineExceeded
|
||||||
|
| tonic::Code::Unknown
|
||||||
|
) || retryable_message(status.message())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retryable_message(message: &str) -> bool {
|
||||||
|
let lower = message.to_ascii_lowercase();
|
||||||
|
[
|
||||||
|
"transport error",
|
||||||
|
"connection was not ready",
|
||||||
|
"h2 protocol error",
|
||||||
|
"broken pipe",
|
||||||
|
"connection refused",
|
||||||
|
"connection reset",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.any(|needle| lower.contains(needle))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tonic_not_found(message: &str) -> bool {
|
||||||
|
message.contains("status: NotFound") || message.contains("code: NotFound")
|
||||||
|
}
|
||||||
|
|
||||||
fn map_status(status: tonic::Status) -> Error {
|
fn map_status(status: tonic::Status) -> Error {
|
||||||
Error::Internal(status.to_string())
|
Error::Internal(status.to_string())
|
||||||
}
|
}
|
||||||
|
|
@ -507,3 +601,75 @@ fn to_proto_principal_ref(principal_ref: &PrincipalRef) -> ProtoPrincipalRef {
|
||||||
id: principal_ref.id.clone(),
|
id: principal_ref.id.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::sync::{
|
||||||
|
atomic::{AtomicUsize, Ordering},
|
||||||
|
Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retryable_message_covers_connection_readiness() {
|
||||||
|
assert!(retryable_message("transport error"));
|
||||||
|
assert!(retryable_message("connection was not ready"));
|
||||||
|
assert!(retryable_message("h2 protocol error"));
|
||||||
|
assert!(!retryable_message("permission denied"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retry_delay_is_capped() {
|
||||||
|
assert_eq!(retry_delay(0), Duration::from_millis(200));
|
||||||
|
assert_eq!(retry_delay(1), Duration::from_millis(400));
|
||||||
|
assert_eq!(retry_delay(2), Duration::from_millis(800));
|
||||||
|
assert_eq!(retry_delay(3), Duration::from_millis(1000));
|
||||||
|
assert_eq!(retry_delay(7), Duration::from_millis(1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(start_paused = true)]
|
||||||
|
async fn call_with_retry_retries_transient_statuses() {
|
||||||
|
let attempts = Arc::new(AtomicUsize::new(0));
|
||||||
|
let attempts_for_task = attempts.clone();
|
||||||
|
let task = tokio::spawn(async move {
|
||||||
|
IamClient::call_with_retry("test", || {
|
||||||
|
let attempts = attempts_for_task.clone();
|
||||||
|
async move {
|
||||||
|
let attempt = attempts.fetch_add(1, Ordering::SeqCst);
|
||||||
|
if attempt < 2 {
|
||||||
|
Err(tonic::Status::unavailable("connection was not ready"))
|
||||||
|
} else {
|
||||||
|
Ok("ok")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::time::advance(Duration::from_secs(3)).await;
|
||||||
|
assert_eq!(task.await.unwrap().unwrap(), "ok");
|
||||||
|
assert_eq!(attempts.load(Ordering::SeqCst), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(start_paused = true)]
|
||||||
|
async fn call_with_retry_stops_on_non_retryable_status() {
|
||||||
|
let attempts = Arc::new(AtomicUsize::new(0));
|
||||||
|
let attempts_for_task = attempts.clone();
|
||||||
|
|
||||||
|
let err = IamClient::call_with_retry("test", || {
|
||||||
|
let attempts = attempts_for_task.clone();
|
||||||
|
async move {
|
||||||
|
attempts.fetch_add(1, Ordering::SeqCst);
|
||||||
|
Err::<(), _>(tonic::Status::permission_denied("nope"))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap_err();
|
||||||
|
|
||||||
|
assert_eq!(attempts.load(Ordering::SeqCst), 1);
|
||||||
|
match err {
|
||||||
|
Error::Internal(message) => assert!(message.contains("PermissionDenied")),
|
||||||
|
other => panic!("unexpected error: {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,12 +20,15 @@ use tracing::{info, warn};
|
||||||
|
|
||||||
use iam_api::{
|
use iam_api::{
|
||||||
iam_admin_server::IamAdminServer, iam_authz_server::IamAuthzServer,
|
iam_admin_server::IamAdminServer, iam_authz_server::IamAuthzServer,
|
||||||
iam_token_server::IamTokenServer, GatewayAuthServiceImpl, GatewayAuthServiceServer,
|
iam_credential_server::IamCredentialServer, iam_token_server::IamTokenServer,
|
||||||
IamAdminService, IamAuthzService, IamTokenService,
|
GatewayAuthServiceImpl, GatewayAuthServiceServer, IamAdminService, IamAuthzService,
|
||||||
|
IamCredentialService, IamTokenService,
|
||||||
};
|
};
|
||||||
use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey};
|
use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey};
|
||||||
use iam_authz::{PolicyCache, PolicyCacheConfig, PolicyEvaluator};
|
use iam_authz::{PolicyCache, PolicyCacheConfig, PolicyEvaluator};
|
||||||
use iam_store::{Backend, BackendConfig, BindingStore, PrincipalStore, RoleStore, TokenStore};
|
use iam_store::{
|
||||||
|
Backend, BackendConfig, BindingStore, CredentialStore, PrincipalStore, RoleStore, TokenStore,
|
||||||
|
};
|
||||||
|
|
||||||
use config::{BackendKind, ServerConfig};
|
use config::{BackendKind, ServerConfig};
|
||||||
|
|
||||||
|
|
@ -190,6 +193,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let principal_store = Arc::new(PrincipalStore::new(backend.clone()));
|
let principal_store = Arc::new(PrincipalStore::new(backend.clone()));
|
||||||
let role_store = Arc::new(RoleStore::new(backend.clone()));
|
let role_store = Arc::new(RoleStore::new(backend.clone()));
|
||||||
let binding_store = Arc::new(BindingStore::new(backend.clone()));
|
let binding_store = Arc::new(BindingStore::new(backend.clone()));
|
||||||
|
let credential_store = Arc::new(CredentialStore::new(backend.clone()));
|
||||||
let token_store = Arc::new(TokenStore::new(backend.clone()));
|
let token_store = Arc::new(TokenStore::new(backend.clone()));
|
||||||
|
|
||||||
// Initialize builtin roles
|
// Initialize builtin roles
|
||||||
|
|
@ -238,7 +242,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
let token_config = InternalTokenConfig::new(signing_key, &config.authn.internal_token.issuer)
|
let token_config =
|
||||||
|
InternalTokenConfig::new(signing_key.clone(), &config.authn.internal_token.issuer)
|
||||||
.with_default_ttl(Duration::from_secs(
|
.with_default_ttl(Duration::from_secs(
|
||||||
config.authn.internal_token.default_ttl_seconds,
|
config.authn.internal_token.default_ttl_seconds,
|
||||||
))
|
))
|
||||||
|
|
@ -248,6 +253,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
|
||||||
let token_service = Arc::new(InternalTokenService::new(token_config));
|
let token_service = Arc::new(InternalTokenService::new(token_config));
|
||||||
let admin_token = load_admin_token();
|
let admin_token = load_admin_token();
|
||||||
|
let credential_master_key = std::env::var("IAM_CRED_MASTER_KEY")
|
||||||
|
.ok()
|
||||||
|
.map(|value| value.into_bytes())
|
||||||
|
.filter(|value| value.len() == 32)
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
warn!(
|
||||||
|
"IAM_CRED_MASTER_KEY missing or not 32 bytes, deriving credential key from signing key",
|
||||||
|
);
|
||||||
|
signing_key.sign(b"iam-credential-master-key")
|
||||||
|
});
|
||||||
|
|
||||||
// Create gRPC services
|
// Create gRPC services
|
||||||
let authz_service = IamAuthzService::new(evaluator.clone(), principal_store.clone());
|
let authz_service = IamAuthzService::new(evaluator.clone(), principal_store.clone());
|
||||||
|
|
@ -262,6 +277,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
token_store.clone(),
|
token_store.clone(),
|
||||||
evaluator.clone(),
|
evaluator.clone(),
|
||||||
);
|
);
|
||||||
|
let credential_service =
|
||||||
|
IamCredentialService::new(credential_store, &credential_master_key, "iam-cred-master")
|
||||||
|
.map_err(|e| format!("Failed to initialize credential service: {}", e))?;
|
||||||
let admin_service = IamAdminService::new(
|
let admin_service = IamAdminService::new(
|
||||||
principal_store.clone(),
|
principal_store.clone(),
|
||||||
role_store.clone(),
|
role_store.clone(),
|
||||||
|
|
@ -291,6 +309,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
health_reporter
|
health_reporter
|
||||||
.set_serving::<IamTokenServer<IamTokenService>>()
|
.set_serving::<IamTokenServer<IamTokenService>>()
|
||||||
.await;
|
.await;
|
||||||
|
health_reporter
|
||||||
|
.set_serving::<IamCredentialServer<IamCredentialService>>()
|
||||||
|
.await;
|
||||||
health_reporter
|
health_reporter
|
||||||
.set_serving::<IamAdminServer<IamAdminService>>()
|
.set_serving::<IamAdminServer<IamAdminService>>()
|
||||||
.await;
|
.await;
|
||||||
|
|
@ -357,6 +378,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
.add_service(health_service)
|
.add_service(health_service)
|
||||||
.add_service(IamAuthzServer::new(authz_service))
|
.add_service(IamAuthzServer::new(authz_service))
|
||||||
.add_service(IamTokenServer::new(token_grpc_service))
|
.add_service(IamTokenServer::new(token_grpc_service))
|
||||||
|
.add_service(IamCredentialServer::new(credential_service))
|
||||||
.add_service(GatewayAuthServiceServer::new(gateway_auth_service))
|
.add_service(GatewayAuthServiceServer::new(gateway_auth_service))
|
||||||
.add_service(admin_server)
|
.add_service(admin_server)
|
||||||
.serve(config.server.addr);
|
.serve(config.server.addr);
|
||||||
|
|
|
||||||
|
|
@ -9,5 +9,6 @@ iam-client = { path = "../iam-client" }
|
||||||
iam-types = { path = "../iam-types" }
|
iam-types = { path = "../iam-types" }
|
||||||
tonic = { workspace = true }
|
tonic = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
http = "1"
|
http = "1"
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ use tracing::{debug, warn};
|
||||||
const PHOTON_AUTH_TOKEN_HEADER: &str = "x-photon-auth-token";
|
const PHOTON_AUTH_TOKEN_HEADER: &str = "x-photon-auth-token";
|
||||||
const DEFAULT_TOKEN_CACHE_TTL_MS: u64 = 5_000;
|
const DEFAULT_TOKEN_CACHE_TTL_MS: u64 = 5_000;
|
||||||
const DEFAULT_AUTHZ_CACHE_TTL_MS: u64 = 3_000;
|
const DEFAULT_AUTHZ_CACHE_TTL_MS: u64 = 3_000;
|
||||||
|
const AUTH_CONNECT_RETRY_ATTEMPTS: usize = 6;
|
||||||
|
const AUTH_CONNECT_INITIAL_BACKOFF: Duration = Duration::from_millis(500);
|
||||||
|
const AUTH_CONNECT_MAX_BACKOFF: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct CacheEntry<T> {
|
struct CacheEntry<T> {
|
||||||
|
|
@ -64,9 +67,7 @@ impl AuthService {
|
||||||
config = config.without_tls();
|
config = config.without_tls();
|
||||||
}
|
}
|
||||||
|
|
||||||
let iam_client = IamClient::connect(config)
|
let iam_client = connect_iam_with_retry(config).await?;
|
||||||
.await
|
|
||||||
.map_err(|e| format!("Failed to connect to IAM server: {}", e))?;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
iam_client: Arc::new(iam_client),
|
iam_client: Arc::new(iam_client),
|
||||||
|
|
@ -273,6 +274,59 @@ impl AuthService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn connect_iam_with_retry(config: IamClientConfig) -> Result<IamClient, String> {
|
||||||
|
let mut last_error = None;
|
||||||
|
for attempt in 0..AUTH_CONNECT_RETRY_ATTEMPTS {
|
||||||
|
match IamClient::connect(config.clone()).await {
|
||||||
|
Ok(client) => return Ok(client),
|
||||||
|
Err(err)
|
||||||
|
if attempt + 1 < AUTH_CONNECT_RETRY_ATTEMPTS
|
||||||
|
&& retryable_connect_error(&err.to_string()) =>
|
||||||
|
{
|
||||||
|
let delay = auth_connect_retry_delay(attempt);
|
||||||
|
warn!(
|
||||||
|
attempt = attempt + 1,
|
||||||
|
retry_after_ms = delay.as_millis() as u64,
|
||||||
|
error = %err,
|
||||||
|
"retrying IAM auth service bootstrap connection"
|
||||||
|
);
|
||||||
|
last_error = Some(err.to_string());
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
return Err(format!("Failed to connect to IAM server: {}", err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!(
|
||||||
|
"Failed to connect to IAM server: {}",
|
||||||
|
last_error.unwrap_or_else(|| "unknown connection error".to_string())
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn auth_connect_retry_delay(attempt: usize) -> Duration {
|
||||||
|
AUTH_CONNECT_INITIAL_BACKOFF
|
||||||
|
.saturating_mul(1u32 << attempt.min(4))
|
||||||
|
.min(AUTH_CONNECT_MAX_BACKOFF)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retryable_connect_error(message: &str) -> bool {
|
||||||
|
let lower = message.to_ascii_lowercase();
|
||||||
|
[
|
||||||
|
"transport error",
|
||||||
|
"connection refused",
|
||||||
|
"connection was not ready",
|
||||||
|
"operation timed out",
|
||||||
|
"deadline has elapsed",
|
||||||
|
"dns error",
|
||||||
|
"broken pipe",
|
||||||
|
"connection reset",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.any(|needle| lower.contains(needle))
|
||||||
|
}
|
||||||
|
|
||||||
fn prune_expired<T>(cache: &mut HashMap<String, CacheEntry<T>>) {
|
fn prune_expired<T>(cache: &mut HashMap<String, CacheEntry<T>>) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
cache.retain(|_, entry| entry.expires_at > now);
|
cache.retain(|_, entry| entry.expires_at > now);
|
||||||
|
|
@ -400,6 +454,29 @@ fn extract_token_from_metadata(metadata: &MetadataMap) -> Result<String, Status>
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retryable_connect_error_matches_transport_failures() {
|
||||||
|
assert!(retryable_connect_error("Internal error: transport error"));
|
||||||
|
assert!(retryable_connect_error("connection was not ready"));
|
||||||
|
assert!(retryable_connect_error("deadline has elapsed"));
|
||||||
|
assert!(!retryable_connect_error("permission denied"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auth_connect_retry_delay_is_capped() {
|
||||||
|
assert_eq!(auth_connect_retry_delay(0), Duration::from_millis(500));
|
||||||
|
assert_eq!(auth_connect_retry_delay(1), Duration::from_millis(1000));
|
||||||
|
assert_eq!(auth_connect_retry_delay(2), Duration::from_millis(2000));
|
||||||
|
assert_eq!(auth_connect_retry_delay(3), Duration::from_millis(4000));
|
||||||
|
assert_eq!(auth_connect_retry_delay(4), Duration::from_secs(5));
|
||||||
|
assert_eq!(auth_connect_retry_delay(8), Duration::from_secs(5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_token_from_headers(headers: &HeaderMap) -> Result<String, Status> {
|
fn extract_token_from_headers(headers: &HeaderMap) -> Result<String, Status> {
|
||||||
if let Some(auth_header) = headers.get(AUTHORIZATION) {
|
if let Some(auth_header) = headers.get(AUTHORIZATION) {
|
||||||
let auth_str = auth_header
|
let auth_str = auth_header
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,25 @@
|
||||||
//! Credential storage (access/secret key metadata)
|
//! Credential storage (access/secret key metadata)
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use iam_types::{CredentialRecord, Result};
|
use iam_types::{CredentialRecord, Result};
|
||||||
|
|
||||||
use crate::backend::JsonStore;
|
use crate::backend::{Backend, CasResult, JsonStore, StorageBackend};
|
||||||
use crate::{DynMetadataClient, MetadataClient};
|
|
||||||
|
|
||||||
/// Store for credentials (S3/API keys)
|
/// Store for credentials (S3/API keys)
|
||||||
pub struct CredentialStore {
|
pub struct CredentialStore {
|
||||||
client: DynMetadataClient,
|
backend: Arc<Backend>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl JsonStore for CredentialStore {
|
impl JsonStore for CredentialStore {
|
||||||
fn client(&self) -> &dyn MetadataClient {
|
fn backend(&self) -> &Backend {
|
||||||
self.client.as_ref()
|
&self.backend
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CredentialStore {
|
impl CredentialStore {
|
||||||
pub fn new(client: DynMetadataClient) -> Self {
|
pub fn new(backend: Arc<Backend>) -> Self {
|
||||||
Self { client }
|
Self { backend }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn put(&self, record: &CredentialRecord) -> Result<u64> {
|
pub async fn put(&self, record: &CredentialRecord) -> Result<u64> {
|
||||||
|
|
@ -36,13 +37,17 @@ impl CredentialStore {
|
||||||
principal_id: &str,
|
principal_id: &str,
|
||||||
limit: u32,
|
limit: u32,
|
||||||
) -> Result<Vec<CredentialRecord>> {
|
) -> Result<Vec<CredentialRecord>> {
|
||||||
// scan prefix and filter by principal_id; small cardinality expected
|
|
||||||
let prefix = b"iam/credentials/";
|
let prefix = b"iam/credentials/";
|
||||||
let items = self.scan_prefix_json::<CredentialRecord>(prefix, limit).await?;
|
let items = self.backend.scan_prefix(prefix, limit).await?;
|
||||||
Ok(items
|
let mut credentials = Vec::new();
|
||||||
.into_iter()
|
for pair in items {
|
||||||
.filter(|rec| rec.principal_id == principal_id)
|
let record: CredentialRecord = serde_json::from_slice(&pair.value)
|
||||||
.collect())
|
.map_err(|e| iam_types::Error::Serialization(e.to_string()))?;
|
||||||
|
if record.principal_id == principal_id {
|
||||||
|
credentials.push(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(credentials)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn revoke(&self, access_key_id: &str) -> Result<bool> {
|
pub async fn revoke(&self, access_key_id: &str) -> Result<bool> {
|
||||||
|
|
@ -56,13 +61,10 @@ impl CredentialStore {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
record.revoked = true;
|
record.revoked = true;
|
||||||
match self
|
match self.cas_json(key.as_bytes(), version, &record).await? {
|
||||||
.cas_json(key.as_bytes(), version, &record)
|
CasResult::Success(_) => Ok(true),
|
||||||
.await?
|
CasResult::Conflict { .. } => Ok(false),
|
||||||
{
|
CasResult::NotFound => Ok(false),
|
||||||
crate::CasResult::Success(_) => Ok(true),
|
|
||||||
crate::CasResult::Conflict { .. } => Ok(false),
|
|
||||||
crate::CasResult::NotFound => Ok(false),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
pub mod backend;
|
pub mod backend;
|
||||||
pub mod binding_store;
|
pub mod binding_store;
|
||||||
|
pub mod credential_store;
|
||||||
pub mod group_store;
|
pub mod group_store;
|
||||||
pub mod principal_store;
|
pub mod principal_store;
|
||||||
pub mod role_store;
|
pub mod role_store;
|
||||||
|
|
@ -14,6 +15,7 @@ pub mod token_store;
|
||||||
|
|
||||||
pub use backend::{Backend, BackendConfig, CasResult, KvPair, StorageBackend};
|
pub use backend::{Backend, BackendConfig, CasResult, KvPair, StorageBackend};
|
||||||
pub use binding_store::BindingStore;
|
pub use binding_store::BindingStore;
|
||||||
|
pub use credential_store::CredentialStore;
|
||||||
pub use group_store::GroupStore;
|
pub use group_store::GroupStore;
|
||||||
pub use principal_store::PrincipalStore;
|
pub use principal_store::PrincipalStore;
|
||||||
pub use role_store::RoleStore;
|
pub use role_store::RoleStore;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::PrincipalKind;
|
||||||
|
|
||||||
/// Argon2 parameters used to hash the secret key
|
/// Argon2 parameters used to hash the secret key
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
pub struct Argon2Params {
|
pub struct Argon2Params {
|
||||||
|
|
@ -17,6 +19,9 @@ pub struct Argon2Params {
|
||||||
pub struct CredentialRecord {
|
pub struct CredentialRecord {
|
||||||
pub access_key_id: String,
|
pub access_key_id: String,
|
||||||
pub principal_id: String,
|
pub principal_id: String,
|
||||||
|
pub principal_kind: PrincipalKind,
|
||||||
|
pub org_id: Option<String>,
|
||||||
|
pub project_id: Option<String>,
|
||||||
pub created_at: u64,
|
pub created_at: u64,
|
||||||
pub expires_at: Option<u64>,
|
pub expires_at: Option<u64>,
|
||||||
pub revoked: bool,
|
pub revoked: bool,
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
//! - Error types
|
//! - Error types
|
||||||
|
|
||||||
pub mod condition;
|
pub mod condition;
|
||||||
|
pub mod credential;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod policy;
|
pub mod policy;
|
||||||
pub mod principal;
|
pub mod principal;
|
||||||
|
|
@ -19,6 +20,7 @@ pub mod scope;
|
||||||
pub mod token;
|
pub mod token;
|
||||||
|
|
||||||
pub use condition::{Condition, ConditionExpr};
|
pub use condition::{Condition, ConditionExpr};
|
||||||
|
pub use credential::{Argon2Params, CredentialRecord};
|
||||||
pub use error::{Error, IamError, Result, StorageError};
|
pub use error::{Error, IamError, Result, StorageError};
|
||||||
pub use policy::{CreateBindingRequest, EffectivePolicy, PolicyBinding};
|
pub use policy::{CreateBindingRequest, EffectivePolicy, PolicyBinding};
|
||||||
pub use principal::{Principal, PrincipalKind, PrincipalRef};
|
pub use principal::{Principal, PrincipalKind, PrincipalRef};
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,14 @@ service IamToken {
|
||||||
rpc RefreshToken(RefreshTokenRequest) returns (RefreshTokenResponse);
|
rpc RefreshToken(RefreshTokenRequest) returns (RefreshTokenResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IamCredential manages S3-style access/secret key credentials.
|
||||||
|
service IamCredential {
|
||||||
|
rpc CreateS3Credential(CreateS3CredentialRequest) returns (CreateS3CredentialResponse);
|
||||||
|
rpc GetSecretKey(GetSecretKeyRequest) returns (GetSecretKeyResponse);
|
||||||
|
rpc ListCredentials(ListCredentialsRequest) returns (ListCredentialsResponse);
|
||||||
|
rpc RevokeCredential(RevokeCredentialRequest) returns (RevokeCredentialResponse);
|
||||||
|
}
|
||||||
|
|
||||||
message IssueTokenRequest {
|
message IssueTokenRequest {
|
||||||
// Principal to issue token for
|
// Principal to issue token for
|
||||||
string principal_id = 1;
|
string principal_id = 1;
|
||||||
|
|
@ -162,6 +170,63 @@ message RefreshTokenResponse {
|
||||||
uint64 expires_at = 2;
|
uint64 expires_at = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message CreateS3CredentialRequest {
|
||||||
|
string principal_id = 1;
|
||||||
|
string description = 2;
|
||||||
|
optional uint64 expires_at = 3;
|
||||||
|
optional string org_id = 4;
|
||||||
|
optional string project_id = 5;
|
||||||
|
PrincipalKind principal_kind = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message CreateS3CredentialResponse {
|
||||||
|
string access_key_id = 1;
|
||||||
|
string secret_key = 2;
|
||||||
|
uint64 created_at = 3;
|
||||||
|
optional uint64 expires_at = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetSecretKeyRequest {
|
||||||
|
string access_key_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetSecretKeyResponse {
|
||||||
|
string secret_key = 1;
|
||||||
|
string principal_id = 2;
|
||||||
|
optional uint64 expires_at = 3;
|
||||||
|
optional string org_id = 4;
|
||||||
|
optional string project_id = 5;
|
||||||
|
PrincipalKind principal_kind = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ListCredentialsRequest {
|
||||||
|
string principal_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Credential {
|
||||||
|
string access_key_id = 1;
|
||||||
|
string principal_id = 2;
|
||||||
|
uint64 created_at = 3;
|
||||||
|
optional uint64 expires_at = 4;
|
||||||
|
bool revoked = 5;
|
||||||
|
string description = 6;
|
||||||
|
optional string org_id = 7;
|
||||||
|
optional string project_id = 8;
|
||||||
|
PrincipalKind principal_kind = 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ListCredentialsResponse {
|
||||||
|
repeated Credential credentials = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RevokeCredentialRequest {
|
||||||
|
string access_key_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RevokeCredentialResponse {
|
||||||
|
bool success = 1;
|
||||||
|
}
|
||||||
|
|
||||||
message InternalTokenClaims {
|
message InternalTokenClaims {
|
||||||
string principal_id = 1;
|
string principal_id = 1;
|
||||||
PrincipalKind principal_kind = 2;
|
PrincipalKind principal_kind = 2;
|
||||||
|
|
|
||||||
796
k8shost/Cargo.lock
generated
796
k8shost/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
588
lightningstor/Cargo.lock
generated
588
lightningstor/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -10,6 +10,8 @@ use crate::node::{NodeClientTrait, NodeRegistry};
|
||||||
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
use futures::future::BoxFuture;
|
||||||
|
use futures::stream::{FuturesUnordered, StreamExt};
|
||||||
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
||||||
use lightningstor_types::ObjectId;
|
use lightningstor_types::ObjectId;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
@ -336,7 +338,7 @@ impl ErasureCodedBackend {
|
||||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
|
||||||
// Try to read all shards in parallel
|
// Try to read all shards in parallel
|
||||||
let mut shard_futures = Vec::with_capacity(self.total_shards());
|
let mut shard_futures = FuturesUnordered::new();
|
||||||
for shard_idx in 0..self.total_shards() {
|
for shard_idx in 0..self.total_shards() {
|
||||||
let is_parity = shard_idx >= self.data_shards;
|
let is_parity = shard_idx >= self.data_shards;
|
||||||
let chunk_id = ChunkId::new(object_id, chunk_index, shard_idx, is_parity);
|
let chunk_id = ChunkId::new(object_id, chunk_index, shard_idx, is_parity);
|
||||||
|
|
@ -345,35 +347,73 @@ impl ErasureCodedBackend {
|
||||||
let chunk_key = chunk_id.to_key();
|
let chunk_key = chunk_id.to_key();
|
||||||
|
|
||||||
shard_futures.push(async move {
|
shard_futures.push(async move {
|
||||||
// Try to read from the preferred node first
|
let preferred_id = node_selector
|
||||||
if let Ok(node) = node_selector.select_for_read(&nodes, &chunk_key).await {
|
.select_for_read(&nodes, &chunk_key)
|
||||||
if let Ok(data) = node
|
.await
|
||||||
.get_chunk(&chunk_key, shard_idx as u32, is_parity)
|
.ok()
|
||||||
.await
|
.map(|node| node.node_id().to_string());
|
||||||
|
let mut readers: FuturesUnordered<BoxFuture<'static, Option<Bytes>>> =
|
||||||
|
FuturesUnordered::new();
|
||||||
|
|
||||||
|
if let Some(preferred_id) = preferred_id.as_ref() {
|
||||||
|
if let Some(preferred) = nodes
|
||||||
|
.iter()
|
||||||
|
.find(|node| node.node_id() == preferred_id.as_str())
|
||||||
|
.cloned()
|
||||||
{
|
{
|
||||||
return Some(data);
|
let key = chunk_key.clone();
|
||||||
|
readers.push(Box::pin(async move {
|
||||||
|
preferred
|
||||||
|
.get_chunk(&key, shard_idx as u32, is_parity)
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try other nodes if preferred fails
|
|
||||||
for node in &nodes {
|
for node in &nodes {
|
||||||
if let Ok(data) = node
|
if preferred_id
|
||||||
.get_chunk(&chunk_key, shard_idx as u32, is_parity)
|
.as_ref()
|
||||||
.await
|
.is_some_and(|preferred| preferred == node.node_id())
|
||||||
{
|
{
|
||||||
return Some(data);
|
continue;
|
||||||
|
}
|
||||||
|
let node = node.clone();
|
||||||
|
let key = chunk_key.clone();
|
||||||
|
readers.push(Box::pin(async move {
|
||||||
|
node.get_chunk(&key, shard_idx as u32, is_parity).await.ok()
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(result) = readers.next().await {
|
||||||
|
if let Some(data) = result {
|
||||||
|
return (shard_idx, Some(data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
(shard_idx, None)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_results: Vec<Option<Bytes>> = futures::future::join_all(shard_futures).await;
|
let mut shard_results = vec![None; self.total_shards()];
|
||||||
|
let mut available_count = 0usize;
|
||||||
|
|
||||||
|
while let Some((shard_idx, shard)) = shard_futures.next().await {
|
||||||
|
if shard.is_some() {
|
||||||
|
available_count += 1;
|
||||||
|
}
|
||||||
|
shard_results[shard_idx] = shard;
|
||||||
|
|
||||||
|
if available_count >= self.data_shards {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if available_count + shard_futures.len() < self.data_shards {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Count available shards
|
// Count available shards
|
||||||
let available_count = shard_results.iter().filter(|s| s.is_some()).count();
|
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
object_id = %object_id,
|
object_id = %object_id,
|
||||||
chunk_index,
|
chunk_index,
|
||||||
|
|
@ -419,9 +459,9 @@ impl StorageBackend for ErasureCodedBackend {
|
||||||
debug!(object_id = %object_id, size = original_size, "Putting object with erasure coding");
|
debug!(object_id = %object_id, size = original_size, "Putting object with erasure coding");
|
||||||
|
|
||||||
// Split data into chunks
|
// Split data into chunks
|
||||||
let chunks = self.chunk_manager.split(&data);
|
let chunk_size = self.chunk_manager.effective_chunk_size(data.len());
|
||||||
|
let chunks = self.chunk_manager.split_with_chunk_size(&data, chunk_size);
|
||||||
let chunk_count = chunks.len();
|
let chunk_count = chunks.len();
|
||||||
let chunk_size = self.chunk_manager.chunk_size();
|
|
||||||
|
|
||||||
// Write each chunk
|
// Write each chunk
|
||||||
for (chunk_idx, chunk_data) in chunks.into_iter().enumerate() {
|
for (chunk_idx, chunk_data) in chunks.into_iter().enumerate() {
|
||||||
|
|
@ -591,24 +631,78 @@ impl StorageBackend for ErasureCodedBackend {
|
||||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
|
||||||
// Try to read shards
|
// Try to read shards
|
||||||
let mut shard_futures = Vec::with_capacity(self.total_shards());
|
let mut shard_futures = FuturesUnordered::new();
|
||||||
for shard_idx in 0..self.total_shards() {
|
for shard_idx in 0..self.total_shards() {
|
||||||
let is_parity = shard_idx >= self.data_shards;
|
let is_parity = shard_idx >= self.data_shards;
|
||||||
let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" });
|
let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" });
|
||||||
let nodes = nodes.clone();
|
let nodes = nodes.clone();
|
||||||
|
let node_selector = self.node_selector.clone();
|
||||||
|
|
||||||
shard_futures.push(async move {
|
shard_futures.push(async move {
|
||||||
for node in &nodes {
|
let preferred_id = node_selector
|
||||||
if let Ok(data) = node.get_chunk(&key, shard_idx as u32, is_parity).await {
|
.select_for_read(&nodes, &key)
|
||||||
return Some(data);
|
.await
|
||||||
|
.ok()
|
||||||
|
.map(|node| node.node_id().to_string());
|
||||||
|
let mut readers: FuturesUnordered<BoxFuture<'static, Option<Bytes>>> =
|
||||||
|
FuturesUnordered::new();
|
||||||
|
|
||||||
|
if let Some(preferred_id) = preferred_id.as_ref() {
|
||||||
|
if let Some(preferred) = nodes
|
||||||
|
.iter()
|
||||||
|
.find(|node| node.node_id() == preferred_id.as_str())
|
||||||
|
.cloned()
|
||||||
|
{
|
||||||
|
let key = key.clone();
|
||||||
|
readers.push(Box::pin(async move {
|
||||||
|
preferred
|
||||||
|
.get_chunk(&key, shard_idx as u32, is_parity)
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
|
||||||
|
for node in &nodes {
|
||||||
|
if preferred_id
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|preferred| preferred == node.node_id())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let node = node.clone();
|
||||||
|
let key = key.clone();
|
||||||
|
readers.push(Box::pin(async move {
|
||||||
|
node.get_chunk(&key, shard_idx as u32, is_parity).await.ok()
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(result) = readers.next().await {
|
||||||
|
if let Some(data) = result {
|
||||||
|
return (shard_idx, Some(data));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(shard_idx, None)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_results: Vec<Option<Bytes>> = futures::future::join_all(shard_futures).await;
|
let mut shard_results = vec![None; self.total_shards()];
|
||||||
let available = shard_results.iter().filter(|s| s.is_some()).count();
|
let mut available = 0usize;
|
||||||
|
|
||||||
|
while let Some((shard_idx, shard)) = shard_futures.next().await {
|
||||||
|
if shard.is_some() {
|
||||||
|
available += 1;
|
||||||
|
}
|
||||||
|
shard_results[shard_idx] = shard;
|
||||||
|
|
||||||
|
if available >= self.data_shards {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if available + shard_futures.len() < self.data_shards {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if available < self.data_shards {
|
if available < self.data_shards {
|
||||||
return Err(StorageError::Backend(format!(
|
return Err(StorageError::Backend(format!(
|
||||||
|
|
@ -674,7 +768,135 @@ impl StorageBackend for ErasureCodedBackend {
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::config::{ChunkConfig, RedundancyMode};
|
use crate::config::{ChunkConfig, RedundancyMode};
|
||||||
use crate::node::MockNodeRegistry;
|
use crate::node::{MockNodeClient, MockNodeRegistry, NodeError, NodeResult};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use dashmap::DashMap;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
struct SlowReadNodeClient {
|
||||||
|
node_id: String,
|
||||||
|
endpoint: String,
|
||||||
|
delay: Duration,
|
||||||
|
chunks: DashMap<String, Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SlowReadNodeClient {
|
||||||
|
fn new(node_id: impl Into<String>, endpoint: impl Into<String>, delay: Duration) -> Self {
|
||||||
|
Self {
|
||||||
|
node_id: node_id.into(),
|
||||||
|
endpoint: endpoint.into(),
|
||||||
|
delay,
|
||||||
|
chunks: DashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_chunk(&self, chunk_id: impl Into<String>, data: Vec<u8>) {
|
||||||
|
self.chunks.insert(chunk_id.into(), data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl NodeClientTrait for SlowReadNodeClient {
|
||||||
|
fn node_id(&self) -> &str {
|
||||||
|
&self.node_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn endpoint(&self) -> &str {
|
||||||
|
&self.endpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn is_healthy(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn put_chunk(
|
||||||
|
&self,
|
||||||
|
chunk_id: &str,
|
||||||
|
_shard_index: u32,
|
||||||
|
_is_parity: bool,
|
||||||
|
data: Bytes,
|
||||||
|
) -> NodeResult<()> {
|
||||||
|
self.chunks.insert(chunk_id.to_string(), data.to_vec());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_chunk(
|
||||||
|
&self,
|
||||||
|
chunk_id: &str,
|
||||||
|
_shard_index: u32,
|
||||||
|
_is_parity: bool,
|
||||||
|
) -> NodeResult<Bytes> {
|
||||||
|
sleep(self.delay).await;
|
||||||
|
self.chunks
|
||||||
|
.get(chunk_id)
|
||||||
|
.map(|value| Bytes::from(value.value().clone()))
|
||||||
|
.ok_or_else(|| NodeError::NotFound(chunk_id.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()> {
|
||||||
|
self.chunks.remove(chunk_id);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool> {
|
||||||
|
Ok(self.chunks.contains_key(chunk_id))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>> {
|
||||||
|
Ok(self
|
||||||
|
.chunks
|
||||||
|
.get(chunk_id)
|
||||||
|
.map(|value| value.value().len() as u64))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ping(&self) -> NodeResult<Duration> {
|
||||||
|
Ok(Duration::from_millis(1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FixedNodeRegistry {
|
||||||
|
nodes: Vec<Arc<dyn NodeClientTrait>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl NodeRegistry for FixedNodeRegistry {
|
||||||
|
async fn get_all_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||||
|
Ok(self.nodes.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_healthy_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||||
|
Ok(self.nodes.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn register_node(&self, _info: crate::node::NodeInfo) -> NodeResult<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn deregister_node(&self, _node_id: &str) -> NodeResult<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_health(&self, _node_id: &str, _healthy: bool) -> NodeResult<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_node(&self, node_id: &str) -> NodeResult<Option<Arc<dyn NodeClientTrait>>> {
|
||||||
|
Ok(self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.find(|node| node.node_id() == node_id)
|
||||||
|
.cloned())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn node_count(&self) -> usize {
|
||||||
|
self.nodes.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn healthy_node_count(&self) -> usize {
|
||||||
|
self.nodes.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn create_ec_config(data_shards: usize, parity_shards: usize) -> DistributedConfig {
|
fn create_ec_config(data_shards: usize, parity_shards: usize) -> DistributedConfig {
|
||||||
DistributedConfig {
|
DistributedConfig {
|
||||||
|
|
@ -858,4 +1080,162 @@ mod tests {
|
||||||
assert_eq!(retrieved.len(), data.len());
|
assert_eq!(retrieved.len(), data.len());
|
||||||
assert_eq!(retrieved, data);
|
assert_eq!(retrieved, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_ec_backend_read_returns_after_minimum_shards() {
|
||||||
|
let config = create_ec_config(4, 2);
|
||||||
|
let mut fast_nodes = Vec::new();
|
||||||
|
for index in 0..4 {
|
||||||
|
fast_nodes.push(Arc::new(MockNodeClient::new(
|
||||||
|
format!("fast-{index}"),
|
||||||
|
format!("http://fast-{index}:9002"),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let slow_a = Arc::new(SlowReadNodeClient::new(
|
||||||
|
"slow-a",
|
||||||
|
"http://slow-a:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
let slow_b = Arc::new(SlowReadNodeClient::new(
|
||||||
|
"slow-b",
|
||||||
|
"http://slow-b:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
|
||||||
|
let backend = ErasureCodedBackend::new(
|
||||||
|
config,
|
||||||
|
Arc::new(FixedNodeRegistry {
|
||||||
|
nodes: vec![
|
||||||
|
fast_nodes[0].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[1].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[2].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[3].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
slow_a.clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
slow_b.clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let object_id = ObjectId::new();
|
||||||
|
let data = Bytes::from(vec![5u8; 512]);
|
||||||
|
let metadata = ObjectMetadata::new(data.len() as u64, 1, data.len());
|
||||||
|
let meta_key = ObjectMetadata::metadata_key(&object_id);
|
||||||
|
let shards = backend.codec.encode(&data).unwrap();
|
||||||
|
|
||||||
|
for fast_node in &fast_nodes {
|
||||||
|
fast_node
|
||||||
|
.put_chunk(&meta_key, 0, false, Bytes::from(metadata.to_bytes()))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
for slow_node in [&slow_a, &slow_b] {
|
||||||
|
slow_node.insert_chunk(meta_key.clone(), metadata.to_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (shard_idx, shard_data) in shards.into_iter().enumerate() {
|
||||||
|
let is_parity = shard_idx >= backend.data_shards;
|
||||||
|
let key = ChunkId::new(&object_id, 0, shard_idx, is_parity).to_key();
|
||||||
|
if shard_idx < 4 {
|
||||||
|
fast_nodes[shard_idx]
|
||||||
|
.put_chunk(
|
||||||
|
&key,
|
||||||
|
shard_idx as u32,
|
||||||
|
is_parity,
|
||||||
|
Bytes::from(shard_data),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
} else if shard_idx == 4 {
|
||||||
|
slow_a.insert_chunk(key, shard_data);
|
||||||
|
} else {
|
||||||
|
slow_b.insert_chunk(key, shard_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let started = Instant::now();
|
||||||
|
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||||
|
let elapsed = started.elapsed();
|
||||||
|
|
||||||
|
assert!(elapsed < Duration::from_millis(200), "elapsed={elapsed:?}");
|
||||||
|
assert_eq!(retrieved, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_ec_backend_get_part_returns_after_minimum_shards() {
|
||||||
|
let config = create_ec_config(4, 2);
|
||||||
|
let mut fast_nodes = Vec::new();
|
||||||
|
for index in 0..4 {
|
||||||
|
fast_nodes.push(Arc::new(MockNodeClient::new(
|
||||||
|
format!("fast-{index}"),
|
||||||
|
format!("http://fast-{index}:9002"),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let slow_a = Arc::new(SlowReadNodeClient::new(
|
||||||
|
"slow-a",
|
||||||
|
"http://slow-a:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
let slow_b = Arc::new(SlowReadNodeClient::new(
|
||||||
|
"slow-b",
|
||||||
|
"http://slow-b:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
|
||||||
|
let backend = ErasureCodedBackend::new(
|
||||||
|
config,
|
||||||
|
Arc::new(FixedNodeRegistry {
|
||||||
|
nodes: vec![
|
||||||
|
fast_nodes[0].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[1].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[2].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
fast_nodes[3].clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
slow_a.clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
slow_b.clone() as Arc<dyn NodeClientTrait>,
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let upload_id = "upload-latency";
|
||||||
|
let part_number = 7;
|
||||||
|
let data = Bytes::from(vec![9u8; 512]);
|
||||||
|
let shards = backend.codec.encode(&data).unwrap();
|
||||||
|
|
||||||
|
for (shard_idx, shard_data) in shards.into_iter().enumerate() {
|
||||||
|
let is_parity = shard_idx >= backend.data_shards;
|
||||||
|
let key = format!(
|
||||||
|
"part_{}_{}_{}_{}",
|
||||||
|
upload_id,
|
||||||
|
part_number,
|
||||||
|
shard_idx,
|
||||||
|
if is_parity { "p" } else { "d" }
|
||||||
|
);
|
||||||
|
|
||||||
|
if shard_idx < 4 {
|
||||||
|
fast_nodes[shard_idx]
|
||||||
|
.put_chunk(
|
||||||
|
&key,
|
||||||
|
shard_idx as u32,
|
||||||
|
is_parity,
|
||||||
|
Bytes::from(shard_data),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
} else if shard_idx == 4 {
|
||||||
|
slow_a.insert_chunk(key, shard_data);
|
||||||
|
} else {
|
||||||
|
slow_b.insert_chunk(key, shard_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let started = Instant::now();
|
||||||
|
let retrieved = backend.get_part(upload_id, part_number).await.unwrap();
|
||||||
|
let elapsed = started.elapsed();
|
||||||
|
|
||||||
|
assert!(elapsed < Duration::from_millis(200), "elapsed={elapsed:?}");
|
||||||
|
assert_eq!(retrieved, data);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,13 +5,15 @@
|
||||||
|
|
||||||
use crate::chunk::ChunkManager;
|
use crate::chunk::ChunkManager;
|
||||||
use crate::config::DistributedConfig;
|
use crate::config::DistributedConfig;
|
||||||
use crate::node::{NodeClientTrait, NodeError, NodeRegistry};
|
use crate::node::{NodeClientTrait, NodeError, NodeRegistry, NodeResult};
|
||||||
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
||||||
|
use crate::repair::{RepairQueue, ReplicatedRepairTask};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use bytes::{Bytes, BytesMut};
|
use bytes::{Bytes, BytesMut};
|
||||||
use futures::stream::{FuturesUnordered, StreamExt};
|
use futures::stream::{FuturesUnordered, StreamExt};
|
||||||
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
||||||
use lightningstor_types::ObjectId;
|
use lightningstor_types::ObjectId;
|
||||||
|
use std::net::IpAddr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tracing::{debug, error, warn};
|
use tracing::{debug, error, warn};
|
||||||
|
|
@ -81,6 +83,8 @@ pub struct ReplicatedBackend {
|
||||||
read_quorum: usize,
|
read_quorum: usize,
|
||||||
/// Write quorum (minimum replicas for successful write)
|
/// Write quorum (minimum replicas for successful write)
|
||||||
write_quorum: usize,
|
write_quorum: usize,
|
||||||
|
/// Durable queue for repairing under-replicated chunks.
|
||||||
|
repair_queue: Option<Arc<dyn RepairQueue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ReplicatedBackend {
|
impl ReplicatedBackend {
|
||||||
|
|
@ -92,6 +96,15 @@ impl ReplicatedBackend {
|
||||||
pub async fn new(
|
pub async fn new(
|
||||||
config: DistributedConfig,
|
config: DistributedConfig,
|
||||||
node_registry: Arc<dyn NodeRegistry>,
|
node_registry: Arc<dyn NodeRegistry>,
|
||||||
|
) -> StorageResult<Self> {
|
||||||
|
Self::new_with_repair_queue(config, node_registry, None).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a replicated backend with an optional durable repair queue.
|
||||||
|
pub async fn new_with_repair_queue(
|
||||||
|
config: DistributedConfig,
|
||||||
|
node_registry: Arc<dyn NodeRegistry>,
|
||||||
|
repair_queue: Option<Arc<dyn RepairQueue>>,
|
||||||
) -> StorageResult<Self> {
|
) -> StorageResult<Self> {
|
||||||
let (replica_count, read_quorum, write_quorum) = match &config.redundancy {
|
let (replica_count, read_quorum, write_quorum) = match &config.redundancy {
|
||||||
crate::config::RedundancyMode::Replicated {
|
crate::config::RedundancyMode::Replicated {
|
||||||
|
|
@ -116,6 +129,7 @@ impl ReplicatedBackend {
|
||||||
replica_count,
|
replica_count,
|
||||||
read_quorum,
|
read_quorum,
|
||||||
write_quorum,
|
write_quorum,
|
||||||
|
repair_queue,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,6 +148,89 @@ impl ReplicatedBackend {
|
||||||
self.write_quorum
|
self.write_quorum
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn finalize_pending_replica_writes(
|
||||||
|
repair_queue: Option<Arc<dyn RepairQueue>>,
|
||||||
|
mut pending_writes: FuturesUnordered<tokio::task::JoinHandle<(String, NodeResult<()>)>>,
|
||||||
|
key: String,
|
||||||
|
shard_index: u32,
|
||||||
|
mut success_count: usize,
|
||||||
|
total_replicas: usize,
|
||||||
|
reason: String,
|
||||||
|
) {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
|
while let Some(result) = pending_writes.next().await {
|
||||||
|
match result {
|
||||||
|
Ok((_, Ok(()))) => success_count += 1,
|
||||||
|
Ok((node_id, Err(err))) => errors.push(format!("{node_id}: {err}")),
|
||||||
|
Err(join_err) => errors.push(format!("join error: {join_err}")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if success_count >= total_replicas {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(queue) = repair_queue {
|
||||||
|
queue
|
||||||
|
.enqueue_repair(ReplicatedRepairTask::new(key.clone(), shard_index, reason))
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
chunk_key = %key,
|
||||||
|
shard_index,
|
||||||
|
success_count,
|
||||||
|
total_replicas,
|
||||||
|
errors = ?errors,
|
||||||
|
"Replica write completed below desired replication; repair task queued"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn finalize_pending_chunked_write_repairs(
|
||||||
|
repair_queue: Option<Arc<dyn RepairQueue>>,
|
||||||
|
mut pending_writes: FuturesUnordered<tokio::task::JoinHandle<(String, NodeResult<()>)>>,
|
||||||
|
repair_targets: Vec<(String, u32)>,
|
||||||
|
object_id: String,
|
||||||
|
mut success_count: usize,
|
||||||
|
total_replicas: usize,
|
||||||
|
reason: String,
|
||||||
|
) {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
|
while let Some(result) = pending_writes.next().await {
|
||||||
|
match result {
|
||||||
|
Ok((_, Ok(()))) => success_count += 1,
|
||||||
|
Ok((node_id, Err(err))) => errors.push(format!("{node_id}: {err}")),
|
||||||
|
Err(join_err) => errors.push(format!("join error: {join_err}")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if success_count >= total_replicas {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(queue) = repair_queue {
|
||||||
|
for (chunk_key, shard_index) in repair_targets {
|
||||||
|
queue
|
||||||
|
.enqueue_repair(ReplicatedRepairTask::new(
|
||||||
|
chunk_key,
|
||||||
|
shard_index,
|
||||||
|
reason.clone(),
|
||||||
|
))
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
object_id = %object_id,
|
||||||
|
success_count,
|
||||||
|
total_replicas,
|
||||||
|
errors = ?errors,
|
||||||
|
"Chunked replica write completed below desired replication; repair tasks queued"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn chunk_write_parallelism(&self, chunk_count: usize) -> usize {
|
fn chunk_write_parallelism(&self, chunk_count: usize) -> usize {
|
||||||
chunk_count
|
chunk_count
|
||||||
.min(
|
.min(
|
||||||
|
|
@ -220,7 +317,13 @@ impl ReplicatedBackend {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(preferred) = self.node_selector.select_for_read(nodes, key).await {
|
let mut ordered_nodes = Self::ordered_read_nodes(nodes, self
|
||||||
|
.node_selector
|
||||||
|
.select_for_read(nodes, key)
|
||||||
|
.await
|
||||||
|
.ok());
|
||||||
|
|
||||||
|
if let Some(preferred) = ordered_nodes.first() {
|
||||||
match preferred.get_chunk(key, shard_index, false).await {
|
match preferred.get_chunk(key, shard_index, false).await {
|
||||||
Ok(data) => return Ok(Some(data)),
|
Ok(data) => return Ok(Some(data)),
|
||||||
Err(NodeError::NotFound(_)) => {}
|
Err(NodeError::NotFound(_)) => {}
|
||||||
|
|
@ -235,7 +338,7 @@ impl ReplicatedBackend {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for node in nodes {
|
for node in ordered_nodes.drain(1..) {
|
||||||
match node.get_chunk(key, shard_index, false).await {
|
match node.get_chunk(key, shard_index, false).await {
|
||||||
Ok(data) => return Ok(Some(data)),
|
Ok(data) => return Ok(Some(data)),
|
||||||
Err(NodeError::NotFound(_)) => continue,
|
Err(NodeError::NotFound(_)) => continue,
|
||||||
|
|
@ -383,6 +486,21 @@ impl ReplicatedBackend {
|
||||||
Ok((_, Ok(()))) => {
|
Ok((_, Ok(()))) => {
|
||||||
success_count += 1;
|
success_count += 1;
|
||||||
if success_count >= self.write_quorum {
|
if success_count >= self.write_quorum {
|
||||||
|
if success_count < total_replicas {
|
||||||
|
let pending_writes =
|
||||||
|
std::mem::replace(&mut write_futures, FuturesUnordered::new());
|
||||||
|
tokio::spawn(Self::finalize_pending_replica_writes(
|
||||||
|
self.repair_queue.clone(),
|
||||||
|
pending_writes,
|
||||||
|
key.clone(),
|
||||||
|
shard_index,
|
||||||
|
success_count,
|
||||||
|
total_replicas,
|
||||||
|
format!(
|
||||||
|
"replica write completed below desired replication after quorum ({success_count}/{total_replicas})"
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
debug!(
|
debug!(
|
||||||
chunk_key = %key,
|
chunk_key = %key,
|
||||||
success_count,
|
success_count,
|
||||||
|
|
@ -427,13 +545,13 @@ impl ReplicatedBackend {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn write_chunked_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()> {
|
async fn write_chunked_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()> {
|
||||||
let chunk_size = self.chunk_manager.chunk_size();
|
let chunk_size = self.chunk_manager.effective_chunk_size(data.len());
|
||||||
let chunk_count = self.chunk_manager.chunk_count(data.len());
|
let chunk_count = ChunkManager::chunk_count_for_size(data.len(), chunk_size);
|
||||||
let metadata = ReplicatedObjectMetadata::new(data.len(), chunk_count, chunk_size);
|
let metadata = ReplicatedObjectMetadata::new(data.len(), chunk_count, chunk_size);
|
||||||
let mut requests = Vec::with_capacity(chunk_count + 1);
|
let mut requests = Vec::with_capacity(chunk_count + 1);
|
||||||
for chunk_index in 0..chunk_count {
|
for chunk_index in 0..chunk_count {
|
||||||
let chunk_key = Self::object_chunk_key(object_id, chunk_index);
|
let chunk_key = Self::object_chunk_key(object_id, chunk_index);
|
||||||
let (start, len) = self.chunk_manager.chunk_range(data.len(), chunk_index);
|
let (start, len) = ChunkManager::chunk_range_for_size(data.len(), chunk_index, chunk_size);
|
||||||
let chunk_bytes = data.slice(start..start + len);
|
let chunk_bytes = data.slice(start..start + len);
|
||||||
requests.push((chunk_key, chunk_index as u32, false, chunk_bytes));
|
requests.push((chunk_key, chunk_index as u32, false, chunk_bytes));
|
||||||
}
|
}
|
||||||
|
|
@ -464,6 +582,27 @@ impl ReplicatedBackend {
|
||||||
Ok((_, Ok(()))) => {
|
Ok((_, Ok(()))) => {
|
||||||
success_count += 1;
|
success_count += 1;
|
||||||
if success_count >= self.write_quorum {
|
if success_count >= self.write_quorum {
|
||||||
|
if success_count < total_replicas {
|
||||||
|
let repair_targets = requests
|
||||||
|
.iter()
|
||||||
|
.map(|(chunk_key, shard_index, _, _)| {
|
||||||
|
(chunk_key.clone(), *shard_index)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let pending_writes =
|
||||||
|
std::mem::replace(&mut write_futures, FuturesUnordered::new());
|
||||||
|
tokio::spawn(Self::finalize_pending_chunked_write_repairs(
|
||||||
|
self.repair_queue.clone(),
|
||||||
|
pending_writes,
|
||||||
|
repair_targets,
|
||||||
|
object_id.to_string(),
|
||||||
|
success_count,
|
||||||
|
total_replicas,
|
||||||
|
format!(
|
||||||
|
"chunked object write completed below desired replication after quorum ({success_count}/{total_replicas})"
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
debug!(
|
debug!(
|
||||||
object_id = %object_id,
|
object_id = %object_id,
|
||||||
chunk_count,
|
chunk_count,
|
||||||
|
|
@ -509,6 +648,150 @@ impl ReplicatedBackend {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn repair_chunk(&self, task: &ReplicatedRepairTask) -> StorageResult<()> {
|
||||||
|
let healthy_nodes = self
|
||||||
|
.node_registry
|
||||||
|
.get_healthy_nodes()
|
||||||
|
.await
|
||||||
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
if healthy_nodes.is_empty() {
|
||||||
|
return Err(StorageError::Backend(
|
||||||
|
"No healthy storage nodes available for repair".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let desired_nodes = self
|
||||||
|
.node_selector
|
||||||
|
.select_nodes_for_key(&healthy_nodes, self.replica_count, &task.key)
|
||||||
|
.await
|
||||||
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
|
||||||
|
let mut present_nodes = Vec::new();
|
||||||
|
let mut missing_nodes = Vec::new();
|
||||||
|
for node in desired_nodes {
|
||||||
|
match node.chunk_exists(&task.key).await {
|
||||||
|
Ok(true) => present_nodes.push(node),
|
||||||
|
Ok(false) => missing_nodes.push(node),
|
||||||
|
Err(err) => {
|
||||||
|
warn!(
|
||||||
|
chunk_key = task.key,
|
||||||
|
node_id = node.node_id(),
|
||||||
|
error = ?err,
|
||||||
|
"Failed to inspect chunk during repair; treating replica as missing"
|
||||||
|
);
|
||||||
|
missing_nodes.push(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if missing_nodes.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if present_nodes.is_empty() {
|
||||||
|
let desired_node_ids = missing_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|node| node.node_id().to_string())
|
||||||
|
.collect::<std::collections::HashSet<_>>();
|
||||||
|
for node in healthy_nodes {
|
||||||
|
if desired_node_ids.contains(node.node_id()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match node.chunk_exists(&task.key).await {
|
||||||
|
Ok(true) => {
|
||||||
|
present_nodes.push(node);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok(false) => {}
|
||||||
|
Err(err) => {
|
||||||
|
warn!(
|
||||||
|
chunk_key = task.key,
|
||||||
|
node_id = node.node_id(),
|
||||||
|
error = ?err,
|
||||||
|
"Failed to inspect off-placement chunk during repair"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let source = present_nodes.first().ok_or_else(|| {
|
||||||
|
StorageError::Backend(format!(
|
||||||
|
"Cannot repair {} because no healthy source replica is available",
|
||||||
|
task.key
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let data = source
|
||||||
|
.get_chunk(&task.key, task.shard_index, false)
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
StorageError::Backend(format!(
|
||||||
|
"Failed to load repair source for {} from {}: {}",
|
||||||
|
task.key,
|
||||||
|
source.node_id(),
|
||||||
|
err
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut repair_futures = FuturesUnordered::new();
|
||||||
|
for node in missing_nodes {
|
||||||
|
let node_id = node.node_id().to_string();
|
||||||
|
let key = task.key.clone();
|
||||||
|
let chunk = data.clone();
|
||||||
|
let shard_index = task.shard_index;
|
||||||
|
repair_futures.push(tokio::spawn(async move {
|
||||||
|
let result = node.put_chunk(&key, shard_index, false, chunk).await;
|
||||||
|
(node_id, result)
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut repaired = 0usize;
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
while let Some(result) = repair_futures.next().await {
|
||||||
|
match result {
|
||||||
|
Ok((_, Ok(()))) => repaired += 1,
|
||||||
|
Ok((node_id, Err(err))) => errors.push(format!("{node_id}: {err}")),
|
||||||
|
Err(join_err) => errors.push(format!("join error: {join_err}")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(StorageError::Backend(format!(
|
||||||
|
"Repair for {} only restored {} replicas: {}",
|
||||||
|
task.key,
|
||||||
|
repaired,
|
||||||
|
errors.join(", ")
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn chunk_exists_anywhere(&self, key: &str) -> StorageResult<bool> {
|
||||||
|
let nodes = self
|
||||||
|
.node_registry
|
||||||
|
.get_all_nodes()
|
||||||
|
.await
|
||||||
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
|
||||||
|
for node in nodes {
|
||||||
|
match node.chunk_exists(key).await {
|
||||||
|
Ok(true) => return Ok(true),
|
||||||
|
Ok(false) => {}
|
||||||
|
Err(err) => {
|
||||||
|
warn!(
|
||||||
|
chunk_key = key,
|
||||||
|
node_id = node.node_id(),
|
||||||
|
error = ?err,
|
||||||
|
"Failed to inspect chunk while probing global existence"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
async fn read_chunked_object(
|
async fn read_chunked_object(
|
||||||
&self,
|
&self,
|
||||||
object_id: &ObjectId,
|
object_id: &ObjectId,
|
||||||
|
|
@ -521,24 +804,47 @@ impl ReplicatedBackend {
|
||||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||||
|
|
||||||
if !nodes.is_empty() {
|
if !nodes.is_empty() {
|
||||||
let mut ordered_nodes = Vec::with_capacity(nodes.len());
|
let preferred = self
|
||||||
if let Ok(preferred) = self
|
|
||||||
.node_selector
|
.node_selector
|
||||||
.select_for_read(&nodes, &Self::object_chunk_key(object_id, 0))
|
.select_for_read(&nodes, &Self::object_chunk_key(object_id, 0))
|
||||||
.await
|
.await
|
||||||
{
|
.ok();
|
||||||
ordered_nodes.push(preferred.clone());
|
let ordered_nodes = Self::ordered_read_nodes(&nodes, preferred);
|
||||||
}
|
|
||||||
for node in nodes {
|
if metadata.chunk_count > 1 {
|
||||||
if ordered_nodes
|
if let Some(local_node) = ordered_nodes.iter().find(|node| Self::is_local_node(node))
|
||||||
.iter()
|
|
||||||
.all(|existing| existing.node_id() != node.node_id())
|
|
||||||
{
|
{
|
||||||
ordered_nodes.push(node);
|
let batch_requests: Vec<(String, u32, bool)> = (0..metadata.chunk_count)
|
||||||
|
.map(|chunk_index| {
|
||||||
|
(
|
||||||
|
Self::object_chunk_key(object_id, chunk_index),
|
||||||
|
chunk_index as u32,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
match local_node.batch_get_chunks(batch_requests).await {
|
||||||
|
Ok(chunks) => {
|
||||||
|
return Self::assemble_chunked_bytes(
|
||||||
|
object_id,
|
||||||
|
metadata.original_size,
|
||||||
|
chunks,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
warn!(
|
||||||
|
object_id = %object_id,
|
||||||
|
node_id = local_node.node_id(),
|
||||||
|
error = ?err,
|
||||||
|
"Local replica batch read failed, falling back to distributed reads"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ordered_nodes.len() > 1 && metadata.chunk_count > 1 {
|
if ordered_nodes.len() > 1 && metadata.chunk_count > 1 && !Self::has_local_node(&ordered_nodes)
|
||||||
|
{
|
||||||
match self
|
match self
|
||||||
.read_chunked_object_from_distributed_batches(
|
.read_chunked_object_from_distributed_batches(
|
||||||
object_id,
|
object_id,
|
||||||
|
|
@ -783,6 +1089,74 @@ impl ReplicatedBackend {
|
||||||
combined.truncate(original_size);
|
combined.truncate(original_size);
|
||||||
Ok(combined.freeze())
|
Ok(combined.freeze())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn ordered_read_nodes(
|
||||||
|
nodes: &[Arc<dyn NodeClientTrait>],
|
||||||
|
preferred: Option<Arc<dyn NodeClientTrait>>,
|
||||||
|
) -> Vec<Arc<dyn NodeClientTrait>> {
|
||||||
|
let mut ordered = Vec::with_capacity(nodes.len());
|
||||||
|
|
||||||
|
if let Some(local) = nodes.iter().find(|node| Self::is_local_node(node)) {
|
||||||
|
ordered.push(local.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(preferred) = preferred {
|
||||||
|
if ordered
|
||||||
|
.iter()
|
||||||
|
.all(|existing| existing.node_id() != preferred.node_id())
|
||||||
|
{
|
||||||
|
ordered.push(preferred);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for node in nodes {
|
||||||
|
if ordered
|
||||||
|
.iter()
|
||||||
|
.all(|existing| existing.node_id() != node.node_id())
|
||||||
|
{
|
||||||
|
ordered.push(node.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ordered
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_local_node(nodes: &[Arc<dyn NodeClientTrait>]) -> bool {
|
||||||
|
nodes.iter().any(Self::is_local_node)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_local_node(node: &Arc<dyn NodeClientTrait>) -> bool {
|
||||||
|
Self::endpoint_is_local(node.endpoint())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn endpoint_is_local(endpoint: &str) -> bool {
|
||||||
|
let authority = endpoint
|
||||||
|
.split_once("://")
|
||||||
|
.map(|(_, rest)| rest)
|
||||||
|
.unwrap_or(endpoint)
|
||||||
|
.split('/')
|
||||||
|
.next()
|
||||||
|
.unwrap_or(endpoint);
|
||||||
|
let host = if authority.starts_with('[') {
|
||||||
|
authority
|
||||||
|
.split_once(']')
|
||||||
|
.map(|(host, _)| host.trim_start_matches('['))
|
||||||
|
.unwrap_or(authority.trim_matches(['[', ']']))
|
||||||
|
} else {
|
||||||
|
authority
|
||||||
|
.rsplit_once(':')
|
||||||
|
.map(|(host, _)| host)
|
||||||
|
.unwrap_or(authority)
|
||||||
|
};
|
||||||
|
|
||||||
|
if host.eq_ignore_ascii_case("localhost") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
host.parse::<IpAddr>()
|
||||||
|
.map(|ip| ip.is_loopback())
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
|
@ -908,12 +1282,25 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::config::RedundancyMode;
|
use crate::config::RedundancyMode;
|
||||||
use crate::node::{MockNodeRegistry, NodeError, NodeResult};
|
use crate::node::{MockNodeRegistry, NodeError, NodeResult};
|
||||||
|
use crate::repair::RepairQueue;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct CapturingRepairQueue {
|
||||||
|
tasks: DashMap<String, ReplicatedRepairTask>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl RepairQueue for CapturingRepairQueue {
|
||||||
|
async fn enqueue_repair(&self, task: ReplicatedRepairTask) {
|
||||||
|
self.tasks.insert(task.id.clone(), task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct SlowNodeClient {
|
struct SlowNodeClient {
|
||||||
node_id: String,
|
node_id: String,
|
||||||
endpoint: String,
|
endpoint: String,
|
||||||
|
|
@ -1196,6 +1583,115 @@ mod tests {
|
||||||
assert!(result.is_err());
|
assert!(result.is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_under_replicated_write_enqueues_repair_task() {
|
||||||
|
let config = create_replicated_config(3);
|
||||||
|
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||||
|
let nodes = registry.all_mock_nodes();
|
||||||
|
nodes[2].set_fail_puts(true);
|
||||||
|
|
||||||
|
let repair_queue = Arc::new(CapturingRepairQueue::default());
|
||||||
|
let backend = ReplicatedBackend::new_with_repair_queue(
|
||||||
|
config,
|
||||||
|
registry,
|
||||||
|
Some(repair_queue.clone()),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let object_id = ObjectId::new();
|
||||||
|
backend
|
||||||
|
.put_object(&object_id, Bytes::from_static(b"repair-me"))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut task = None;
|
||||||
|
for _ in 0..20 {
|
||||||
|
task = repair_queue
|
||||||
|
.tasks
|
||||||
|
.iter()
|
||||||
|
.next()
|
||||||
|
.map(|entry| entry.value().clone());
|
||||||
|
if task.is_some() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
}
|
||||||
|
let task = task.expect("repair task should be queued");
|
||||||
|
assert_eq!(task.key, ReplicatedBackend::object_key(&object_id));
|
||||||
|
assert_eq!(task.shard_index, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_repair_chunk_restores_missing_replica() {
|
||||||
|
let config = create_replicated_config(3);
|
||||||
|
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||||
|
let nodes = registry.all_mock_nodes();
|
||||||
|
let backend = ReplicatedBackend::new(config, registry.clone())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let object_id = ObjectId::new();
|
||||||
|
let data = Bytes::from(vec![11u8; 128]);
|
||||||
|
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||||
|
|
||||||
|
let key = ReplicatedBackend::object_key(&object_id);
|
||||||
|
let mut missing = None;
|
||||||
|
for node in &nodes {
|
||||||
|
if node.chunk_exists(&key).await.unwrap() {
|
||||||
|
missing = Some(node.clone());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let missing = missing.expect("at least one replica should exist");
|
||||||
|
missing.delete_chunk(&key).await.unwrap();
|
||||||
|
assert!(!missing.chunk_exists(&key).await.unwrap());
|
||||||
|
|
||||||
|
let task = ReplicatedRepairTask::new(key.clone(), 0, "test");
|
||||||
|
backend.repair_chunk(&task).await.unwrap();
|
||||||
|
assert!(missing.chunk_exists(&key).await.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_repair_chunk_can_source_from_off_placement_replica() {
|
||||||
|
let config = create_replicated_config(2);
|
||||||
|
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||||
|
let nodes = registry.all_mock_nodes();
|
||||||
|
let backend = ReplicatedBackend::new(config, registry.clone())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let object_id = ObjectId::new();
|
||||||
|
let data = Bytes::from(vec![23u8; 128]);
|
||||||
|
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||||
|
|
||||||
|
let key = ReplicatedBackend::object_key(&object_id);
|
||||||
|
let desired_nodes = backend.select_replica_nodes_for_key(&key).await.unwrap();
|
||||||
|
assert_eq!(desired_nodes.len(), 2);
|
||||||
|
let off_placement = nodes
|
||||||
|
.iter()
|
||||||
|
.find(|node| {
|
||||||
|
desired_nodes
|
||||||
|
.iter()
|
||||||
|
.all(|desired| desired.node_id() != node.node_id())
|
||||||
|
})
|
||||||
|
.cloned()
|
||||||
|
.expect("off-placement node should exist");
|
||||||
|
|
||||||
|
let source_bytes = desired_nodes[0].get_chunk(&key, 0, false).await.unwrap();
|
||||||
|
off_placement.put_chunk(&key, 0, false, source_bytes).await.unwrap();
|
||||||
|
for node in &desired_nodes {
|
||||||
|
node.delete_chunk(&key).await.unwrap();
|
||||||
|
assert!(!node.chunk_exists(&key).await.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
let task = ReplicatedRepairTask::new(key.clone(), 0, "off-placement-source");
|
||||||
|
backend.repair_chunk(&task).await.unwrap();
|
||||||
|
for node in &desired_nodes {
|
||||||
|
assert!(node.chunk_exists(&key).await.unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_replicated_backend_returns_after_quorum_without_waiting_for_slow_replica() {
|
async fn test_replicated_backend_returns_after_quorum_without_waiting_for_slow_replica() {
|
||||||
let config = create_replicated_config(3);
|
let config = create_replicated_config(3);
|
||||||
|
|
@ -1333,6 +1829,43 @@ mod tests {
|
||||||
.is_none());
|
.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_replicated_backend_prefers_local_replica_for_chunked_reads() {
|
||||||
|
let mut config = create_replicated_config(3);
|
||||||
|
config.chunk.chunk_size = 64;
|
||||||
|
let local = Arc::new(crate::node::MockNodeClient::new(
|
||||||
|
"local",
|
||||||
|
"http://127.0.0.1:9002",
|
||||||
|
));
|
||||||
|
let slow_a = Arc::new(SlowNodeClient::new(
|
||||||
|
"slow-a",
|
||||||
|
"http://slow-a:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
let slow_b = Arc::new(SlowNodeClient::new(
|
||||||
|
"slow-b",
|
||||||
|
"http://slow-b:9002",
|
||||||
|
Duration::from_millis(250),
|
||||||
|
));
|
||||||
|
let registry = Arc::new(FixedNodeRegistry {
|
||||||
|
nodes: vec![slow_a.clone(), slow_b.clone(), local.clone()],
|
||||||
|
});
|
||||||
|
|
||||||
|
let backend = ReplicatedBackend::new(config, registry).await.unwrap();
|
||||||
|
let object_id = ObjectId::new();
|
||||||
|
let data = Bytes::from(vec![5u8; 256]);
|
||||||
|
|
||||||
|
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||||
|
|
||||||
|
let started = Instant::now();
|
||||||
|
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||||
|
let elapsed = started.elapsed();
|
||||||
|
|
||||||
|
assert_eq!(retrieved, data);
|
||||||
|
assert!(elapsed < Duration::from_millis(150), "elapsed={elapsed:?}");
|
||||||
|
assert!(local.get_count() >= 4);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_replicated_backend_object_size() {
|
async fn test_replicated_backend_object_size() {
|
||||||
let config = create_replicated_config(3);
|
let config = create_replicated_config(3);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
|
|
||||||
use crate::config::ChunkConfig;
|
use crate::config::ChunkConfig;
|
||||||
|
|
||||||
|
const TARGET_CHUNK_COUNT_PER_OBJECT: usize = 8;
|
||||||
|
|
||||||
/// Manages chunk operations for large objects
|
/// Manages chunk operations for large objects
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ChunkManager {
|
pub struct ChunkManager {
|
||||||
|
|
@ -27,18 +29,42 @@ impl ChunkManager {
|
||||||
self.config.chunk_size
|
self.config.chunk_size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Choose the effective chunk size for an object of the given size.
|
||||||
|
///
|
||||||
|
/// Small objects keep the configured default chunk size. Larger objects
|
||||||
|
/// scale up to keep per-object chunk counts bounded without exceeding the
|
||||||
|
/// configured maximum.
|
||||||
|
pub fn effective_chunk_size(&self, total_size: usize) -> usize {
|
||||||
|
if total_size == 0 {
|
||||||
|
return self.config.chunk_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
let min_chunk_size = self.config.min_chunk_size.min(self.config.chunk_size).max(1);
|
||||||
|
let max_chunk_size = self.config.max_chunk_size.max(self.config.chunk_size);
|
||||||
|
let required = total_size.div_ceil(TARGET_CHUNK_COUNT_PER_OBJECT);
|
||||||
|
let alignment = min_chunk_size;
|
||||||
|
let aligned_required = required.div_ceil(alignment) * alignment;
|
||||||
|
|
||||||
|
aligned_required
|
||||||
|
.max(self.config.chunk_size)
|
||||||
|
.clamp(min_chunk_size, max_chunk_size)
|
||||||
|
}
|
||||||
|
|
||||||
/// Split data into chunks
|
/// Split data into chunks
|
||||||
///
|
///
|
||||||
/// Returns a vector of chunks. Each chunk is at most `chunk_size` bytes,
|
/// Returns a vector of chunks. Each chunk is at most `chunk_size` bytes,
|
||||||
/// except the last chunk which may be smaller.
|
/// except the last chunk which may be smaller.
|
||||||
pub fn split(&self, data: &[u8]) -> Vec<Vec<u8>> {
|
pub fn split(&self, data: &[u8]) -> Vec<Vec<u8>> {
|
||||||
|
self.split_with_chunk_size(data, self.config.chunk_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split data into chunks using an explicit chunk size.
|
||||||
|
pub fn split_with_chunk_size(&self, data: &[u8], chunk_size: usize) -> Vec<Vec<u8>> {
|
||||||
if data.is_empty() {
|
if data.is_empty() {
|
||||||
return vec![vec![]];
|
return vec![vec![]];
|
||||||
}
|
}
|
||||||
|
|
||||||
data.chunks(self.config.chunk_size)
|
data.chunks(chunk_size).map(|c| c.to_vec()).collect()
|
||||||
.map(|c| c.to_vec())
|
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reassemble chunks into original data
|
/// Reassemble chunks into original data
|
||||||
|
|
@ -50,21 +76,33 @@ impl ChunkManager {
|
||||||
|
|
||||||
/// Calculate the number of chunks for a given data size
|
/// Calculate the number of chunks for a given data size
|
||||||
pub fn chunk_count(&self, size: usize) -> usize {
|
pub fn chunk_count(&self, size: usize) -> usize {
|
||||||
|
Self::chunk_count_for_size(size, self.config.chunk_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn chunk_count_for_size(size: usize, chunk_size: usize) -> usize {
|
||||||
if size == 0 {
|
if size == 0 {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
(size + self.config.chunk_size - 1) / self.config.chunk_size
|
size.div_ceil(chunk_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the size of a specific chunk
|
/// Calculate the size of a specific chunk
|
||||||
///
|
///
|
||||||
/// Returns the size of the chunk at the given index for data of the given total size.
|
/// Returns the size of the chunk at the given index for data of the given total size.
|
||||||
pub fn chunk_size_at(&self, total_size: usize, chunk_index: usize) -> usize {
|
pub fn chunk_size_at(&self, total_size: usize, chunk_index: usize) -> usize {
|
||||||
let full_chunks = total_size / self.config.chunk_size;
|
Self::chunk_size_at_for_size(total_size, chunk_index, self.config.chunk_size)
|
||||||
let remainder = total_size % self.config.chunk_size;
|
}
|
||||||
|
|
||||||
|
pub fn chunk_size_at_for_size(
|
||||||
|
total_size: usize,
|
||||||
|
chunk_index: usize,
|
||||||
|
chunk_size: usize,
|
||||||
|
) -> usize {
|
||||||
|
let full_chunks = total_size / chunk_size;
|
||||||
|
let remainder = total_size % chunk_size;
|
||||||
|
|
||||||
if chunk_index < full_chunks {
|
if chunk_index < full_chunks {
|
||||||
self.config.chunk_size
|
chunk_size
|
||||||
} else if chunk_index == full_chunks && remainder > 0 {
|
} else if chunk_index == full_chunks && remainder > 0 {
|
||||||
remainder
|
remainder
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -76,8 +114,16 @@ impl ChunkManager {
|
||||||
///
|
///
|
||||||
/// Returns (start_offset, length) for the chunk at the given index.
|
/// Returns (start_offset, length) for the chunk at the given index.
|
||||||
pub fn chunk_range(&self, total_size: usize, chunk_index: usize) -> (usize, usize) {
|
pub fn chunk_range(&self, total_size: usize, chunk_index: usize) -> (usize, usize) {
|
||||||
let start = chunk_index * self.config.chunk_size;
|
Self::chunk_range_for_size(total_size, chunk_index, self.config.chunk_size)
|
||||||
let length = self.chunk_size_at(total_size, chunk_index);
|
}
|
||||||
|
|
||||||
|
pub fn chunk_range_for_size(
|
||||||
|
total_size: usize,
|
||||||
|
chunk_index: usize,
|
||||||
|
chunk_size: usize,
|
||||||
|
) -> (usize, usize) {
|
||||||
|
let start = chunk_index * chunk_size;
|
||||||
|
let length = Self::chunk_size_at_for_size(total_size, chunk_index, chunk_size);
|
||||||
(start, length)
|
(start, length)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -257,6 +303,15 @@ mod tests {
|
||||||
assert_eq!(manager.chunk_range(2500, 2), (2048, 452));
|
assert_eq!(manager.chunk_range(2500, 2), (2048, 452));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_effective_chunk_size_scales_large_objects_up_to_target_chunk_count() {
|
||||||
|
let manager = ChunkManager::default();
|
||||||
|
|
||||||
|
assert_eq!(manager.effective_chunk_size(4 * 1024 * 1024), 8 * 1024 * 1024);
|
||||||
|
assert_eq!(manager.effective_chunk_size(256 * 1024 * 1024), 32 * 1024 * 1024);
|
||||||
|
assert_eq!(manager.effective_chunk_size(1024 * 1024 * 1024), 64 * 1024 * 1024);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_chunk_id_to_key() {
|
fn test_chunk_id_to_key() {
|
||||||
let id = ChunkId::data_shard("obj123", 0, 2);
|
let id = ChunkId::data_shard("obj123", 0, 2);
|
||||||
|
|
|
||||||
|
|
@ -65,12 +65,14 @@ pub mod config;
|
||||||
pub mod erasure;
|
pub mod erasure;
|
||||||
pub mod node;
|
pub mod node;
|
||||||
pub mod placement;
|
pub mod placement;
|
||||||
|
pub mod repair;
|
||||||
|
|
||||||
// Re-export commonly used types
|
// Re-export commonly used types
|
||||||
pub use backends::{ErasureCodedBackend, ReplicatedBackend};
|
pub use backends::{ErasureCodedBackend, ReplicatedBackend};
|
||||||
pub use config::{BucketStorageConfig, ChunkConfig, DistributedConfig, RedundancyMode};
|
pub use config::{BucketStorageConfig, ChunkConfig, DistributedConfig, RedundancyMode};
|
||||||
pub use node::{MockNodeClient, MockNodeRegistry, NodeRegistry, StaticNodeRegistry};
|
pub use node::{MockNodeClient, MockNodeRegistry, NodeRegistry, StaticNodeRegistry};
|
||||||
pub use placement::{ConsistentHashSelector, NodeSelector, RandomSelector, RoundRobinSelector};
|
pub use placement::{ConsistentHashSelector, NodeSelector, RandomSelector, RoundRobinSelector};
|
||||||
|
pub use repair::{RepairQueue, ReplicatedRepairTask};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
|
||||||
58
lightningstor/crates/lightningstor-distributed/src/repair.rs
Normal file
58
lightningstor/crates/lightningstor-distributed/src/repair.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct ReplicatedRepairTask {
|
||||||
|
pub id: String,
|
||||||
|
pub key: String,
|
||||||
|
pub shard_index: u32,
|
||||||
|
pub reason: String,
|
||||||
|
pub enqueued_at_millis: u64,
|
||||||
|
#[serde(default)]
|
||||||
|
pub attempt_count: u32,
|
||||||
|
#[serde(default)]
|
||||||
|
pub last_error: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub next_attempt_after_millis: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReplicatedRepairTask {
|
||||||
|
pub fn new(key: impl Into<String>, shard_index: u32, reason: impl Into<String>) -> Self {
|
||||||
|
let key = key.into();
|
||||||
|
let now = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_millis() as u64;
|
||||||
|
Self {
|
||||||
|
id: format!("replicated::{key}::{shard_index}"),
|
||||||
|
key,
|
||||||
|
shard_index,
|
||||||
|
reason: reason.into(),
|
||||||
|
enqueued_at_millis: now,
|
||||||
|
attempt_count: 0,
|
||||||
|
last_error: None,
|
||||||
|
next_attempt_after_millis: now,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn schedule_retry(&mut self, error: impl Into<String>, backoff_millis: u64) {
|
||||||
|
let now = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_millis() as u64;
|
||||||
|
self.attempt_count = self.attempt_count.saturating_add(1);
|
||||||
|
self.last_error = Some(error.into());
|
||||||
|
self.next_attempt_after_millis = now.saturating_add(backoff_millis);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_due(&self, now_millis: u64) -> bool {
|
||||||
|
now_millis >= self.next_attempt_after_millis
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait RepairQueue: Send + Sync {
|
||||||
|
async fn enqueue_repair(&self, task: ReplicatedRepairTask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1,13 +1,18 @@
|
||||||
//! Local chunk storage
|
//! Local chunk storage
|
||||||
|
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
|
use std::collections::hash_map::DefaultHasher;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
|
const WRITE_LOCK_STRIPES: usize = 256;
|
||||||
|
|
||||||
/// Errors from chunk storage operations
|
/// Errors from chunk storage operations
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum StorageError {
|
pub enum StorageError {
|
||||||
|
|
@ -45,6 +50,12 @@ pub struct LocalChunkStore {
|
||||||
|
|
||||||
/// Whether writes should be flushed before they are acknowledged.
|
/// Whether writes should be flushed before they are acknowledged.
|
||||||
sync_on_write: bool,
|
sync_on_write: bool,
|
||||||
|
|
||||||
|
/// Monotonic nonce for per-write temporary paths.
|
||||||
|
temp_file_nonce: AtomicU64,
|
||||||
|
|
||||||
|
/// Striped per-chunk write/delete locks to keep same-key updates coherent.
|
||||||
|
write_locks: Vec<Mutex<()>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LocalChunkStore {
|
impl LocalChunkStore {
|
||||||
|
|
@ -65,6 +76,8 @@ impl LocalChunkStore {
|
||||||
max_capacity,
|
max_capacity,
|
||||||
chunk_count: AtomicU64::new(0),
|
chunk_count: AtomicU64::new(0),
|
||||||
sync_on_write,
|
sync_on_write,
|
||||||
|
temp_file_nonce: AtomicU64::new(0),
|
||||||
|
write_locks: (0..WRITE_LOCK_STRIPES).map(|_| Mutex::new(())).collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Scan existing chunks
|
// Scan existing chunks
|
||||||
|
|
@ -91,7 +104,7 @@ impl LocalChunkStore {
|
||||||
|
|
||||||
if metadata.is_file() {
|
if metadata.is_file() {
|
||||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||||
if name.ends_with(".tmp") {
|
if name.ends_with(".tmp") || name.starts_with(".tmp.") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -131,6 +144,25 @@ impl LocalChunkStore {
|
||||||
self.data_dir.join(safe_id)
|
self.data_dir.join(safe_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn temporary_chunk_path(&self, path: &std::path::Path) -> PathBuf {
|
||||||
|
let nonce = self.temp_file_nonce.fetch_add(1, Ordering::Relaxed);
|
||||||
|
let pid = std::process::id();
|
||||||
|
let file_name = path
|
||||||
|
.file_name()
|
||||||
|
.and_then(|name| name.to_str())
|
||||||
|
.unwrap_or("chunk");
|
||||||
|
path.parent()
|
||||||
|
.unwrap_or(&self.data_dir)
|
||||||
|
.join(format!(".tmp.{file_name}.{pid}.{nonce}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_lock(&self, chunk_id: &str) -> &Mutex<()> {
|
||||||
|
let mut hasher = DefaultHasher::new();
|
||||||
|
chunk_id.hash(&mut hasher);
|
||||||
|
let slot = (hasher.finish() as usize) % self.write_locks.len().max(1);
|
||||||
|
&self.write_locks[slot]
|
||||||
|
}
|
||||||
|
|
||||||
async fn resolve_existing_chunk_path(&self, chunk_id: &str) -> StorageResult<PathBuf> {
|
async fn resolve_existing_chunk_path(&self, chunk_id: &str) -> StorageResult<PathBuf> {
|
||||||
if let Some(path) = self.chunk_paths.get(chunk_id) {
|
if let Some(path) = self.chunk_paths.get(chunk_id) {
|
||||||
return Ok(path.clone());
|
return Ok(path.clone());
|
||||||
|
|
@ -154,6 +186,7 @@ impl LocalChunkStore {
|
||||||
|
|
||||||
/// Store a chunk
|
/// Store a chunk
|
||||||
pub async fn put(&self, chunk_id: &str, data: &[u8]) -> StorageResult<u64> {
|
pub async fn put(&self, chunk_id: &str, data: &[u8]) -> StorageResult<u64> {
|
||||||
|
let _guard = self.write_lock(chunk_id).lock().await;
|
||||||
let size = data.len() as u64;
|
let size = data.len() as u64;
|
||||||
|
|
||||||
// Check if replacing existing chunk
|
// Check if replacing existing chunk
|
||||||
|
|
@ -169,7 +202,7 @@ impl LocalChunkStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
let path = self.chunk_path(chunk_id);
|
let path = self.chunk_path(chunk_id);
|
||||||
let temp_path = path.with_extension(".tmp");
|
let temp_path = self.temporary_chunk_path(&path);
|
||||||
if let Some(parent) = path.parent() {
|
if let Some(parent) = path.parent() {
|
||||||
// Multipart uploads fan out concurrent writes into the same shard
|
// Multipart uploads fan out concurrent writes into the same shard
|
||||||
// directory. Create the parent path unconditionally so no writer can
|
// directory. Create the parent path unconditionally so no writer can
|
||||||
|
|
@ -217,6 +250,7 @@ impl LocalChunkStore {
|
||||||
|
|
||||||
/// Delete a chunk
|
/// Delete a chunk
|
||||||
pub async fn delete(&self, chunk_id: &str) -> StorageResult<()> {
|
pub async fn delete(&self, chunk_id: &str) -> StorageResult<()> {
|
||||||
|
let _guard = self.write_lock(chunk_id).lock().await;
|
||||||
if let Some((_, size)) = self.chunk_sizes.remove(chunk_id) {
|
if let Some((_, size)) = self.chunk_sizes.remove(chunk_id) {
|
||||||
let path = match self.chunk_paths.remove(chunk_id) {
|
let path = match self.chunk_paths.remove(chunk_id) {
|
||||||
Some((_, path)) => path,
|
Some((_, path)) => path,
|
||||||
|
|
@ -421,4 +455,34 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(store.chunk_count(), 16);
|
assert_eq!(store.chunk_count(), 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_concurrent_rewrites_same_chunk_use_unique_temp_paths() {
|
||||||
|
let (store, _temp) = create_test_store().await;
|
||||||
|
let store = Arc::new(store);
|
||||||
|
let barrier = Arc::new(Barrier::new(9));
|
||||||
|
let mut tasks = Vec::new();
|
||||||
|
|
||||||
|
for idx in 0..8u8 {
|
||||||
|
let store = Arc::clone(&store);
|
||||||
|
let barrier = Arc::clone(&barrier);
|
||||||
|
tasks.push(tokio::spawn(async move {
|
||||||
|
let payload = vec![idx; 2048];
|
||||||
|
barrier.wait().await;
|
||||||
|
store.put("shared-chunk", &payload).await.unwrap();
|
||||||
|
payload
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier.wait().await;
|
||||||
|
|
||||||
|
let mut expected_payloads = Vec::new();
|
||||||
|
for task in tasks {
|
||||||
|
expected_payloads.push(task.await.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
let stored = store.get("shared-chunk").await.unwrap();
|
||||||
|
assert!(expected_payloads.iter().any(|payload| payload == &stored));
|
||||||
|
assert_eq!(store.chunk_count(), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ lightningstor-distributed = { workspace = true }
|
||||||
lightningstor-storage = { workspace = true }
|
lightningstor-storage = { workspace = true }
|
||||||
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
||||||
flaredb-client = { path = "../../../flaredb/crates/flaredb-client" }
|
flaredb-client = { path = "../../../flaredb/crates/flaredb-client" }
|
||||||
|
iam-api = { path = "../../../iam/crates/iam-api" }
|
||||||
iam-service-auth = { path = "../../../iam/crates/iam-service-auth" }
|
iam-service-auth = { path = "../../../iam/crates/iam-service-auth" }
|
||||||
tonic = { workspace = true }
|
tonic = { workspace = true }
|
||||||
tonic-health = { workspace = true }
|
tonic-health = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,11 @@ mod bucket_service;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod metadata;
|
pub mod metadata;
|
||||||
mod object_service;
|
mod object_service;
|
||||||
|
pub mod repair;
|
||||||
pub mod s3;
|
pub mod s3;
|
||||||
|
pub mod tenant;
|
||||||
|
|
||||||
pub use bucket_service::BucketServiceImpl;
|
pub use bucket_service::BucketServiceImpl;
|
||||||
pub use config::ServerConfig;
|
pub use config::ServerConfig;
|
||||||
pub use object_service::ObjectServiceImpl;
|
pub use object_service::ObjectServiceImpl;
|
||||||
|
pub use repair::{MetadataRepairQueue, spawn_replicated_repair_worker};
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,13 @@ use clap::Parser;
|
||||||
use iam_service_auth::AuthService;
|
use iam_service_auth::AuthService;
|
||||||
use lightningstor_api::{BucketServiceServer, ObjectServiceServer};
|
use lightningstor_api::{BucketServiceServer, ObjectServiceServer};
|
||||||
use lightningstor_distributed::{
|
use lightningstor_distributed::{
|
||||||
DistributedConfig, ErasureCodedBackend, RedundancyMode, ReplicatedBackend, StaticNodeRegistry,
|
DistributedConfig, ErasureCodedBackend, RedundancyMode, ReplicatedBackend, RepairQueue,
|
||||||
|
StaticNodeRegistry,
|
||||||
};
|
};
|
||||||
use lightningstor_server::{
|
use lightningstor_server::{
|
||||||
config::{MetadataBackend, ObjectStorageBackend},
|
config::{MetadataBackend, ObjectStorageBackend},
|
||||||
metadata::MetadataStore,
|
metadata::MetadataStore,
|
||||||
|
repair::{spawn_replicated_repair_worker, MetadataRepairQueue},
|
||||||
s3, BucketServiceImpl, ObjectServiceImpl, ServerConfig,
|
s3, BucketServiceImpl, ObjectServiceImpl, ServerConfig,
|
||||||
};
|
};
|
||||||
use lightningstor_storage::{LocalFsBackend, StorageBackend};
|
use lightningstor_storage::{LocalFsBackend, StorageBackend};
|
||||||
|
|
@ -28,6 +30,12 @@ const OBJECT_GRPC_INITIAL_STREAM_WINDOW: u32 = 64 * 1024 * 1024;
|
||||||
const OBJECT_GRPC_INITIAL_CONNECTION_WINDOW: u32 = 512 * 1024 * 1024;
|
const OBJECT_GRPC_INITIAL_CONNECTION_WINDOW: u32 = 512 * 1024 * 1024;
|
||||||
const OBJECT_GRPC_KEEPALIVE_INTERVAL: Duration = Duration::from_secs(30);
|
const OBJECT_GRPC_KEEPALIVE_INTERVAL: Duration = Duration::from_secs(30);
|
||||||
const OBJECT_GRPC_KEEPALIVE_TIMEOUT: Duration = Duration::from_secs(10);
|
const OBJECT_GRPC_KEEPALIVE_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
const REPLICATED_REPAIR_SCAN_INTERVAL: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
|
struct StorageRuntime {
|
||||||
|
backend: Arc<dyn StorageBackend>,
|
||||||
|
repair_worker: Option<tokio::task::JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
/// LightningStor object storage server
|
/// LightningStor object storage server
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
|
|
@ -148,8 +156,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
metrics_addr
|
metrics_addr
|
||||||
);
|
);
|
||||||
|
|
||||||
let storage = create_storage_backend(&config).await?;
|
|
||||||
|
|
||||||
if let Some(endpoint) = &config.chainfire_endpoint {
|
if let Some(endpoint) = &config.chainfire_endpoint {
|
||||||
tracing::info!(" Cluster coordination: ChainFire @ {}", endpoint);
|
tracing::info!(" Cluster coordination: ChainFire @ {}", endpoint);
|
||||||
let endpoint = endpoint.clone();
|
let endpoint = endpoint.clone();
|
||||||
|
|
@ -204,6 +210,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let storage_runtime = create_storage_backend(&config, metadata.clone()).await?;
|
||||||
|
let storage = storage_runtime.backend.clone();
|
||||||
|
let _repair_worker = storage_runtime.repair_worker;
|
||||||
|
|
||||||
// Initialize IAM authentication service
|
// Initialize IAM authentication service
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Connecting to IAM server at {}",
|
"Connecting to IAM server at {}",
|
||||||
|
|
@ -253,7 +263,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let s3_addr: SocketAddr = config.s3_addr;
|
let s3_addr: SocketAddr = config.s3_addr;
|
||||||
|
|
||||||
// Start S3 HTTP server with shared state
|
// Start S3 HTTP server with shared state
|
||||||
let s3_router = s3::create_router_with_state(storage.clone(), metadata.clone());
|
let s3_router = s3::create_router_with_auth(
|
||||||
|
storage.clone(),
|
||||||
|
metadata.clone(),
|
||||||
|
Some(config.auth.iam_server_addr.clone()),
|
||||||
|
);
|
||||||
let s3_server = tokio::spawn(async move {
|
let s3_server = tokio::spawn(async move {
|
||||||
tracing::info!("S3 HTTP server listening on {}", s3_addr);
|
tracing::info!("S3 HTTP server listening on {}", s3_addr);
|
||||||
let listener = tokio::net::TcpListener::bind(s3_addr).await.unwrap();
|
let listener = tokio::net::TcpListener::bind(s3_addr).await.unwrap();
|
||||||
|
|
@ -422,24 +436,27 @@ async fn register_chainfire_membership(
|
||||||
|
|
||||||
async fn create_storage_backend(
|
async fn create_storage_backend(
|
||||||
config: &ServerConfig,
|
config: &ServerConfig,
|
||||||
) -> Result<Arc<dyn StorageBackend>, Box<dyn std::error::Error>> {
|
metadata: Arc<MetadataStore>,
|
||||||
|
) -> Result<StorageRuntime, Box<dyn std::error::Error>> {
|
||||||
match config.object_storage_backend {
|
match config.object_storage_backend {
|
||||||
ObjectStorageBackend::LocalFs => {
|
ObjectStorageBackend::LocalFs => {
|
||||||
tracing::info!("Object storage backend: local_fs");
|
tracing::info!("Object storage backend: local_fs");
|
||||||
Ok(Arc::new(
|
Ok(StorageRuntime {
|
||||||
LocalFsBackend::new(&config.data_dir, config.sync_on_write).await?,
|
backend: Arc::new(LocalFsBackend::new(&config.data_dir, config.sync_on_write).await?),
|
||||||
))
|
repair_worker: None,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
ObjectStorageBackend::Distributed => {
|
ObjectStorageBackend::Distributed => {
|
||||||
tracing::info!("Object storage backend: distributed");
|
tracing::info!("Object storage backend: distributed");
|
||||||
create_distributed_storage_backend(&config.distributed).await
|
create_distributed_storage_backend(&config.distributed, metadata).await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_distributed_storage_backend(
|
async fn create_distributed_storage_backend(
|
||||||
config: &DistributedConfig,
|
config: &DistributedConfig,
|
||||||
) -> Result<Arc<dyn StorageBackend>, Box<dyn std::error::Error>> {
|
metadata: Arc<MetadataStore>,
|
||||||
|
) -> Result<StorageRuntime, Box<dyn std::error::Error>> {
|
||||||
let endpoints: Vec<String> = config
|
let endpoints: Vec<String> = config
|
||||||
.node_endpoints
|
.node_endpoints
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -501,9 +518,25 @@ async fn create_distributed_storage_backend(
|
||||||
write_quorum,
|
write_quorum,
|
||||||
"Using replicated LightningStor storage backend"
|
"Using replicated LightningStor storage backend"
|
||||||
);
|
);
|
||||||
Ok(Arc::new(
|
let repair_queue: Arc<dyn RepairQueue> =
|
||||||
ReplicatedBackend::new(config.clone(), registry).await?,
|
Arc::new(MetadataRepairQueue::new(metadata.clone()));
|
||||||
))
|
let backend = Arc::new(
|
||||||
|
ReplicatedBackend::new_with_repair_queue(
|
||||||
|
config.clone(),
|
||||||
|
registry,
|
||||||
|
Some(repair_queue),
|
||||||
|
)
|
||||||
|
.await?,
|
||||||
|
);
|
||||||
|
let repair_worker = Some(spawn_replicated_repair_worker(
|
||||||
|
metadata,
|
||||||
|
backend.clone(),
|
||||||
|
REPLICATED_REPAIR_SCAN_INTERVAL,
|
||||||
|
));
|
||||||
|
Ok(StorageRuntime {
|
||||||
|
backend,
|
||||||
|
repair_worker,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
RedundancyMode::ErasureCoded {
|
RedundancyMode::ErasureCoded {
|
||||||
data_shards,
|
data_shards,
|
||||||
|
|
@ -514,9 +547,10 @@ async fn create_distributed_storage_backend(
|
||||||
parity_shards,
|
parity_shards,
|
||||||
"Using erasure-coded LightningStor storage backend"
|
"Using erasure-coded LightningStor storage backend"
|
||||||
);
|
);
|
||||||
Ok(Arc::new(
|
Ok(StorageRuntime {
|
||||||
ErasureCodedBackend::new(config.clone(), registry).await?,
|
backend: Arc::new(ErasureCodedBackend::new(config.clone(), registry).await?),
|
||||||
))
|
repair_worker: None,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
RedundancyMode::None => Err(std::io::Error::other(
|
RedundancyMode::None => Err(std::io::Error::other(
|
||||||
"distributed object storage does not support redundancy.type=none; use object_storage_backend=local_fs instead",
|
"distributed object storage does not support redundancy.type=none; use object_storage_backend=local_fs instead",
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use flaredb_client::RdbClient;
|
use flaredb_client::RdbClient;
|
||||||
|
use lightningstor_distributed::ReplicatedRepairTask;
|
||||||
use lightningstor_types::{Bucket, BucketId, MultipartUpload, Object, ObjectId, Result};
|
use lightningstor_types::{Bucket, BucketId, MultipartUpload, Object, ObjectId, Result};
|
||||||
use serde_json;
|
use serde_json;
|
||||||
use sqlx::pool::PoolOptions;
|
use sqlx::pool::PoolOptions;
|
||||||
|
|
@ -215,6 +216,12 @@ impl MetadataStore {
|
||||||
end_key
|
end_key
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn exclusive_scan_start(key: &[u8]) -> Vec<u8> {
|
||||||
|
let mut next = key.to_vec();
|
||||||
|
next.push(0);
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
fn flaredb_client_for_key<'a>(
|
fn flaredb_client_for_key<'a>(
|
||||||
clients: &'a [Arc<Mutex<RdbClient>>],
|
clients: &'a [Arc<Mutex<RdbClient>>],
|
||||||
key: &[u8],
|
key: &[u8],
|
||||||
|
|
@ -422,6 +429,56 @@ impl MetadataStore {
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn flaredb_scan_page(
|
||||||
|
clients: &[Arc<Mutex<RdbClient>>],
|
||||||
|
prefix: &[u8],
|
||||||
|
start_after: Option<&[u8]>,
|
||||||
|
limit: u32,
|
||||||
|
) -> Result<(Vec<(String, String)>, bool)> {
|
||||||
|
let end_key = Self::prefix_end(prefix);
|
||||||
|
let start_key = start_after
|
||||||
|
.map(Self::exclusive_scan_start)
|
||||||
|
.unwrap_or_else(|| prefix.to_vec());
|
||||||
|
let fetch_limit = limit.saturating_add(1).max(1);
|
||||||
|
let client = Self::flaredb_scan_client(clients);
|
||||||
|
let (mut items, next) = match {
|
||||||
|
let mut c = client.lock().await;
|
||||||
|
c.raw_scan(start_key.clone(), end_key.clone(), fetch_limit).await
|
||||||
|
} {
|
||||||
|
Ok((keys, values, next)) => {
|
||||||
|
let items = keys
|
||||||
|
.into_iter()
|
||||||
|
.zip(values.into_iter())
|
||||||
|
.map(|(key, value)| {
|
||||||
|
(
|
||||||
|
String::from_utf8_lossy(&key).to_string(),
|
||||||
|
String::from_utf8_lossy(&value).to_string(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
(items, next)
|
||||||
|
}
|
||||||
|
Err(status) if Self::flaredb_requires_strong(&status) => {
|
||||||
|
Self::flaredb_scan_strong(client, &start_key, &end_key, fetch_limit).await?
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(lightningstor_types::Error::StorageError(format!(
|
||||||
|
"FlareDB scan failed: {}",
|
||||||
|
error
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let has_more = if items.len() > limit as usize {
|
||||||
|
items.truncate(limit as usize);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
next.is_some()
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((items, has_more))
|
||||||
|
}
|
||||||
|
|
||||||
async fn flaredb_has_prefix(clients: &[Arc<Mutex<RdbClient>>], prefix: &[u8]) -> Result<bool> {
|
async fn flaredb_has_prefix(clients: &[Arc<Mutex<RdbClient>>], prefix: &[u8]) -> Result<bool> {
|
||||||
let end_key = Self::prefix_end(prefix);
|
let end_key = Self::prefix_end(prefix);
|
||||||
let client = Self::flaredb_scan_client(clients);
|
let client = Self::flaredb_scan_client(clients);
|
||||||
|
|
@ -613,11 +670,146 @@ impl MetadataStore {
|
||||||
results.push((entry.key().clone(), entry.value().clone()));
|
results.push((entry.key().clone(), entry.value().clone()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
results.sort_by(|lhs, rhs| lhs.0.cmp(&rhs.0));
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_prefix_page(
|
||||||
|
&self,
|
||||||
|
prefix: &str,
|
||||||
|
start_after: Option<&str>,
|
||||||
|
limit: u32,
|
||||||
|
) -> Result<(Vec<(String, String)>, bool)> {
|
||||||
|
if limit == 0 {
|
||||||
|
return Ok((Vec::new(), false));
|
||||||
|
}
|
||||||
|
|
||||||
|
match &self.backend {
|
||||||
|
StorageBackend::FlareDB(client) => {
|
||||||
|
Self::flaredb_scan_page(
|
||||||
|
client,
|
||||||
|
prefix.as_bytes(),
|
||||||
|
start_after.map(str::as_bytes),
|
||||||
|
limit,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
StorageBackend::Sql(sql) => {
|
||||||
|
let prefix_end = String::from_utf8(Self::prefix_end(prefix.as_bytes())).map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"Failed to encode prefix end: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let fetch_limit = (limit.saturating_add(1)) as i64;
|
||||||
|
match sql {
|
||||||
|
SqlStorageBackend::Postgres(pool) => {
|
||||||
|
let rows: Vec<(String, String)> = if let Some(after) = start_after {
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT key, value FROM metadata_kv
|
||||||
|
WHERE key >= $1 AND key < $2 AND key > $3
|
||||||
|
ORDER BY key
|
||||||
|
LIMIT $4",
|
||||||
|
)
|
||||||
|
.bind(prefix)
|
||||||
|
.bind(&prefix_end)
|
||||||
|
.bind(after)
|
||||||
|
.bind(fetch_limit)
|
||||||
|
.fetch_all(pool.as_ref())
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"Postgres paged scan failed: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?
|
||||||
|
} else {
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT key, value FROM metadata_kv
|
||||||
|
WHERE key >= $1 AND key < $2
|
||||||
|
ORDER BY key
|
||||||
|
LIMIT $3",
|
||||||
|
)
|
||||||
|
.bind(prefix)
|
||||||
|
.bind(&prefix_end)
|
||||||
|
.bind(fetch_limit)
|
||||||
|
.fetch_all(pool.as_ref())
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"Postgres paged scan failed: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
let has_more = rows.len() > limit as usize;
|
||||||
|
let items = rows.into_iter().take(limit as usize).collect();
|
||||||
|
Ok((items, has_more))
|
||||||
|
}
|
||||||
|
SqlStorageBackend::Sqlite(pool) => {
|
||||||
|
let rows: Vec<(String, String)> = if let Some(after) = start_after {
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT key, value FROM metadata_kv
|
||||||
|
WHERE key >= ?1 AND key < ?2 AND key > ?3
|
||||||
|
ORDER BY key
|
||||||
|
LIMIT ?4",
|
||||||
|
)
|
||||||
|
.bind(prefix)
|
||||||
|
.bind(&prefix_end)
|
||||||
|
.bind(after)
|
||||||
|
.bind(fetch_limit)
|
||||||
|
.fetch_all(pool.as_ref())
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"SQLite paged scan failed: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?
|
||||||
|
} else {
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT key, value FROM metadata_kv
|
||||||
|
WHERE key >= ?1 AND key < ?2
|
||||||
|
ORDER BY key
|
||||||
|
LIMIT ?3",
|
||||||
|
)
|
||||||
|
.bind(prefix)
|
||||||
|
.bind(&prefix_end)
|
||||||
|
.bind(fetch_limit)
|
||||||
|
.fetch_all(pool.as_ref())
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"SQLite paged scan failed: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
let has_more = rows.len() > limit as usize;
|
||||||
|
let items = rows.into_iter().take(limit as usize).collect();
|
||||||
|
Ok((items, has_more))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StorageBackend::InMemory(map) => {
|
||||||
|
let mut rows: Vec<(String, String)> = map
|
||||||
|
.iter()
|
||||||
|
.filter(|entry| entry.key().starts_with(prefix))
|
||||||
|
.map(|entry| (entry.key().clone(), entry.value().clone()))
|
||||||
|
.collect();
|
||||||
|
rows.sort_by(|lhs, rhs| lhs.0.cmp(&rhs.0));
|
||||||
|
if let Some(after) = start_after {
|
||||||
|
rows.retain(|(key, _)| key.as_str() > after);
|
||||||
|
}
|
||||||
|
let has_more = rows.len() > limit as usize;
|
||||||
|
let items = rows.into_iter().take(limit as usize).collect();
|
||||||
|
Ok((items, has_more))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Internal: check if any key exists with a prefix
|
/// Internal: check if any key exists with a prefix
|
||||||
async fn has_prefix(&self, prefix: &str) -> Result<bool> {
|
async fn has_prefix(&self, prefix: &str) -> Result<bool> {
|
||||||
match &self.backend {
|
match &self.backend {
|
||||||
|
|
@ -708,10 +900,64 @@ impl MetadataStore {
|
||||||
"/lightningstor/multipart/uploads/"
|
"/lightningstor/multipart/uploads/"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn multipart_bucket_key(bucket_id: &str, object_key: &str, upload_id: &str) -> String {
|
||||||
|
format!(
|
||||||
|
"/lightningstor/multipart/by-bucket/{}/{}/{}",
|
||||||
|
bucket_id, object_key, upload_id
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn multipart_bucket_prefix(bucket_id: &BucketId, prefix: &str) -> String {
|
||||||
|
format!("/lightningstor/multipart/by-bucket/{}/{}", bucket_id, prefix)
|
||||||
|
}
|
||||||
|
|
||||||
fn multipart_object_key(object_id: &ObjectId) -> String {
|
fn multipart_object_key(object_id: &ObjectId) -> String {
|
||||||
format!("/lightningstor/multipart/objects/{}", object_id)
|
format!("/lightningstor/multipart/objects/{}", object_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn replicated_repair_task_key(task_id: &str) -> String {
|
||||||
|
format!("/lightningstor/repair/replicated/{}", task_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn replicated_repair_task_prefix() -> &'static str {
|
||||||
|
"/lightningstor/repair/replicated/"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn save_replicated_repair_task(&self, task: &ReplicatedRepairTask) -> Result<()> {
|
||||||
|
let key = Self::replicated_repair_task_key(&task.id);
|
||||||
|
let value = serde_json::to_string(task).map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"Failed to serialize replicated repair task: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
self.put(&key, &value).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_replicated_repair_tasks(
|
||||||
|
&self,
|
||||||
|
limit: u32,
|
||||||
|
) -> Result<Vec<ReplicatedRepairTask>> {
|
||||||
|
let (items, _) = self
|
||||||
|
.get_prefix_page(Self::replicated_repair_task_prefix(), None, limit)
|
||||||
|
.await?;
|
||||||
|
let mut tasks = Vec::new();
|
||||||
|
for (_, value) in items {
|
||||||
|
let task: ReplicatedRepairTask = serde_json::from_str(&value).map_err(|e| {
|
||||||
|
lightningstor_types::Error::StorageError(format!(
|
||||||
|
"Failed to deserialize replicated repair task: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
tasks.push(task);
|
||||||
|
}
|
||||||
|
Ok(tasks)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn delete_replicated_repair_task(&self, task_id: &str) -> Result<()> {
|
||||||
|
self.delete_key(&Self::replicated_repair_task_key(task_id)).await
|
||||||
|
}
|
||||||
|
|
||||||
/// Save bucket metadata
|
/// Save bucket metadata
|
||||||
pub async fn save_bucket(&self, bucket: &Bucket) -> Result<()> {
|
pub async fn save_bucket(&self, bucket: &Bucket) -> Result<()> {
|
||||||
let key = Self::bucket_key(&bucket.org_id, &bucket.project_id, bucket.name.as_str());
|
let key = Self::bucket_key(&bucket.org_id, &bucket.project_id, bucket.name.as_str());
|
||||||
|
|
@ -900,6 +1146,13 @@ impl MetadataStore {
|
||||||
prefix: &str,
|
prefix: &str,
|
||||||
max_keys: u32,
|
max_keys: u32,
|
||||||
) -> Result<Vec<Object>> {
|
) -> Result<Vec<Object>> {
|
||||||
|
if max_keys > 0 {
|
||||||
|
return self
|
||||||
|
.list_objects_page(bucket_id, prefix, None, max_keys)
|
||||||
|
.await
|
||||||
|
.map(|(objects, _)| objects);
|
||||||
|
}
|
||||||
|
|
||||||
let prefix_key = Self::object_prefix(bucket_id, prefix);
|
let prefix_key = Self::object_prefix(bucket_id, prefix);
|
||||||
|
|
||||||
let items = self.get_prefix(&prefix_key).await?;
|
let items = self.get_prefix(&prefix_key).await?;
|
||||||
|
|
@ -921,6 +1174,34 @@ impl MetadataStore {
|
||||||
Ok(objects)
|
Ok(objects)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn list_objects_page(
|
||||||
|
&self,
|
||||||
|
bucket_id: &BucketId,
|
||||||
|
prefix: &str,
|
||||||
|
start_after_key: Option<&str>,
|
||||||
|
max_keys: u32,
|
||||||
|
) -> Result<(Vec<Object>, bool)> {
|
||||||
|
if max_keys == 0 {
|
||||||
|
return Ok((Vec::new(), false));
|
||||||
|
}
|
||||||
|
|
||||||
|
let prefix_key = Self::object_prefix(bucket_id, prefix);
|
||||||
|
let start_after_storage_key =
|
||||||
|
start_after_key.map(|key| Self::object_key(bucket_id, key, None));
|
||||||
|
let (items, has_more) = self
|
||||||
|
.get_prefix_page(&prefix_key, start_after_storage_key.as_deref(), max_keys)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let mut objects = Vec::new();
|
||||||
|
for (_, value) in items {
|
||||||
|
if let Ok(object) = serde_json::from_str::<Object>(&value) {
|
||||||
|
objects.push(object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((objects, has_more))
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn save_multipart_upload(&self, upload: &MultipartUpload) -> Result<()> {
|
pub async fn save_multipart_upload(&self, upload: &MultipartUpload) -> Result<()> {
|
||||||
let key = Self::multipart_upload_key(upload.upload_id.as_str());
|
let key = Self::multipart_upload_key(upload.upload_id.as_str());
|
||||||
let value = serde_json::to_string(upload).map_err(|e| {
|
let value = serde_json::to_string(upload).map_err(|e| {
|
||||||
|
|
@ -929,7 +1210,16 @@ impl MetadataStore {
|
||||||
e
|
e
|
||||||
))
|
))
|
||||||
})?;
|
})?;
|
||||||
self.put(&key, &value).await
|
self.put(&key, &value).await?;
|
||||||
|
self.put(
|
||||||
|
&Self::multipart_bucket_key(
|
||||||
|
&upload.bucket_id,
|
||||||
|
upload.key.as_str(),
|
||||||
|
upload.upload_id.as_str(),
|
||||||
|
),
|
||||||
|
&value,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn load_multipart_upload(&self, upload_id: &str) -> Result<Option<MultipartUpload>> {
|
pub async fn load_multipart_upload(&self, upload_id: &str) -> Result<Option<MultipartUpload>> {
|
||||||
|
|
@ -948,6 +1238,14 @@ impl MetadataStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete_multipart_upload(&self, upload_id: &str) -> Result<()> {
|
pub async fn delete_multipart_upload(&self, upload_id: &str) -> Result<()> {
|
||||||
|
if let Some(upload) = self.load_multipart_upload(upload_id).await? {
|
||||||
|
self.delete_key(&Self::multipart_bucket_key(
|
||||||
|
&upload.bucket_id,
|
||||||
|
upload.key.as_str(),
|
||||||
|
upload.upload_id.as_str(),
|
||||||
|
))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
self.delete_key(&Self::multipart_upload_key(upload_id)).await
|
self.delete_key(&Self::multipart_upload_key(upload_id)).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -957,14 +1255,30 @@ impl MetadataStore {
|
||||||
prefix: &str,
|
prefix: &str,
|
||||||
max_uploads: u32,
|
max_uploads: u32,
|
||||||
) -> Result<Vec<MultipartUpload>> {
|
) -> Result<Vec<MultipartUpload>> {
|
||||||
let items = self.get_prefix(Self::multipart_upload_prefix()).await?;
|
let index_prefix = Self::multipart_bucket_prefix(bucket_id, prefix);
|
||||||
|
let items = if max_uploads > 0 {
|
||||||
|
self.get_prefix_page(&index_prefix, None, max_uploads)
|
||||||
|
.await?
|
||||||
|
.0
|
||||||
|
} else {
|
||||||
|
self.get_prefix(&index_prefix).await?
|
||||||
|
};
|
||||||
let mut uploads = Vec::new();
|
let mut uploads = Vec::new();
|
||||||
for (_, value) in items {
|
for (_, value) in items {
|
||||||
if let Ok(upload) = serde_json::from_str::<MultipartUpload>(&value) {
|
if let Ok(upload) = serde_json::from_str::<MultipartUpload>(&value) {
|
||||||
if upload.bucket_id == bucket_id.to_string()
|
uploads.push(upload);
|
||||||
&& upload.key.as_str().starts_with(prefix)
|
}
|
||||||
{
|
}
|
||||||
uploads.push(upload);
|
|
||||||
|
if uploads.is_empty() {
|
||||||
|
let fallback_items = self.get_prefix(Self::multipart_upload_prefix()).await?;
|
||||||
|
for (_, value) in fallback_items {
|
||||||
|
if let Ok(upload) = serde_json::from_str::<MultipartUpload>(&value) {
|
||||||
|
if upload.bucket_id == bucket_id.to_string()
|
||||||
|
&& upload.key.as_str().starts_with(prefix)
|
||||||
|
{
|
||||||
|
uploads.push(upload);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1033,6 +1347,7 @@ fn normalize_transport_addr(endpoint: &str) -> String {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use lightningstor_distributed::ReplicatedRepairTask;
|
||||||
use lightningstor_types::{BucketName, ETag, ObjectKey};
|
use lightningstor_types::{BucketName, ETag, ObjectKey};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|
@ -1119,4 +1434,123 @@ mod tests {
|
||||||
.is_none()
|
.is_none()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn list_objects_page_honors_start_after_and_has_more() {
|
||||||
|
let store = MetadataStore::new_in_memory();
|
||||||
|
let bucket = Bucket::new(
|
||||||
|
BucketName::new("paged-bucket").unwrap(),
|
||||||
|
"org-a",
|
||||||
|
"project-a",
|
||||||
|
"default",
|
||||||
|
);
|
||||||
|
store.save_bucket(&bucket).await.unwrap();
|
||||||
|
|
||||||
|
for key in ["a.txt", "b.txt", "c.txt"] {
|
||||||
|
let mut object = Object::new(
|
||||||
|
bucket.id.to_string(),
|
||||||
|
ObjectKey::new(key).unwrap(),
|
||||||
|
ETag::from_md5(&[7u8; 16]),
|
||||||
|
128,
|
||||||
|
Some("text/plain".to_string()),
|
||||||
|
);
|
||||||
|
object.version = lightningstor_types::ObjectVersion::null();
|
||||||
|
store.save_object(&object).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let (first_page, first_has_more) = store
|
||||||
|
.list_objects_page(&bucket.id, "", None, 2)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
first_page
|
||||||
|
.iter()
|
||||||
|
.map(|object| object.key.as_str().to_string())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
vec!["a.txt".to_string(), "b.txt".to_string()]
|
||||||
|
);
|
||||||
|
assert!(first_has_more);
|
||||||
|
|
||||||
|
let (second_page, second_has_more) = store
|
||||||
|
.list_objects_page(&bucket.id, "", Some("b.txt"), 2)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
second_page
|
||||||
|
.iter()
|
||||||
|
.map(|object| object.key.as_str().to_string())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
vec!["c.txt".to_string()]
|
||||||
|
);
|
||||||
|
assert!(!second_has_more);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn list_multipart_uploads_uses_bucket_prefix_index() {
|
||||||
|
let store = MetadataStore::new_in_memory();
|
||||||
|
let bucket = Bucket::new(
|
||||||
|
BucketName::new("multipart-bucket").unwrap(),
|
||||||
|
"org-a",
|
||||||
|
"project-a",
|
||||||
|
"default",
|
||||||
|
);
|
||||||
|
store.save_bucket(&bucket).await.unwrap();
|
||||||
|
|
||||||
|
let upload_a = MultipartUpload::new(bucket.id.to_string(), ObjectKey::new("a/one.bin").unwrap());
|
||||||
|
let upload_b = MultipartUpload::new(bucket.id.to_string(), ObjectKey::new("a/two.bin").unwrap());
|
||||||
|
let other_bucket = Bucket::new(
|
||||||
|
BucketName::new("other-bucket").unwrap(),
|
||||||
|
"org-a",
|
||||||
|
"project-a",
|
||||||
|
"default",
|
||||||
|
);
|
||||||
|
store.save_bucket(&other_bucket).await.unwrap();
|
||||||
|
let upload_other =
|
||||||
|
MultipartUpload::new(other_bucket.id.to_string(), ObjectKey::new("a/three.bin").unwrap());
|
||||||
|
|
||||||
|
store.save_multipart_upload(&upload_a).await.unwrap();
|
||||||
|
store.save_multipart_upload(&upload_b).await.unwrap();
|
||||||
|
store.save_multipart_upload(&upload_other).await.unwrap();
|
||||||
|
|
||||||
|
let uploads = store
|
||||||
|
.list_multipart_uploads(&bucket.id, "a/", 10)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(uploads.len(), 2);
|
||||||
|
assert_eq!(
|
||||||
|
uploads
|
||||||
|
.iter()
|
||||||
|
.map(|upload| upload.key.as_str().to_string())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
vec!["a/one.bin".to_string(), "a/two.bin".to_string()]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn replicated_repair_tasks_round_trip() {
|
||||||
|
let store = MetadataStore::new_in_memory();
|
||||||
|
let mut task = ReplicatedRepairTask::new("obj_abc", 0, "quorum write");
|
||||||
|
store.save_replicated_repair_task(&task).await.unwrap();
|
||||||
|
|
||||||
|
let tasks = store.list_replicated_repair_tasks(10).await.unwrap();
|
||||||
|
assert_eq!(tasks.len(), 1);
|
||||||
|
assert_eq!(tasks[0].key, "obj_abc");
|
||||||
|
|
||||||
|
task.schedule_retry("transient failure", 5_000);
|
||||||
|
store.save_replicated_repair_task(&task).await.unwrap();
|
||||||
|
|
||||||
|
let tasks = store.list_replicated_repair_tasks(10).await.unwrap();
|
||||||
|
assert_eq!(tasks[0].attempt_count, 1);
|
||||||
|
assert_eq!(tasks[0].last_error.as_deref(), Some("transient failure"));
|
||||||
|
|
||||||
|
store
|
||||||
|
.delete_replicated_repair_task(&task.id)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(store
|
||||||
|
.list_replicated_repair_tasks(10)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.is_empty());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -155,6 +155,10 @@ impl ObjectServiceImpl {
|
||||||
.await
|
.await
|
||||||
.map_err(|e| Status::internal(format!("Failed to delete multipart part: {}", e)))?;
|
.map_err(|e| Status::internal(format!("Failed to delete multipart part: {}", e)))?;
|
||||||
}
|
}
|
||||||
|
self.storage
|
||||||
|
.delete_upload_parts(upload.upload_id.as_str())
|
||||||
|
.await
|
||||||
|
.map_err(|e| Status::internal(format!("Failed to clean multipart upload: {}", e)))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -465,17 +469,15 @@ impl ObjectService for ObjectServiceImpl {
|
||||||
let (start, end) =
|
let (start, end) =
|
||||||
Self::resolve_range(object.size as usize, req.range_start, req.range_end);
|
Self::resolve_range(object.size as usize, req.range_start, req.range_end);
|
||||||
|
|
||||||
if object.etag.is_multipart() {
|
if let Some(upload) = self
|
||||||
if let Some(upload) = self
|
.metadata
|
||||||
.metadata
|
.load_object_multipart_upload(&object.id)
|
||||||
.load_object_multipart_upload(&object.id)
|
.await
|
||||||
.await
|
.map_err(Self::to_status)?
|
||||||
.map_err(Self::to_status)?
|
{
|
||||||
{
|
return Ok(Response::new(
|
||||||
return Ok(Response::new(
|
self.multipart_object_stream(&object, upload, start, end),
|
||||||
self.multipart_object_stream(&object, upload, start, end),
|
));
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = self
|
let data = self
|
||||||
|
|
@ -524,28 +526,21 @@ impl ObjectService for ObjectServiceImpl {
|
||||||
.map_err(Self::to_status)?
|
.map_err(Self::to_status)?
|
||||||
.ok_or_else(|| Status::not_found(format!("Object {} not found", req.key)))?;
|
.ok_or_else(|| Status::not_found(format!("Object {} not found", req.key)))?;
|
||||||
|
|
||||||
if object.etag.is_multipart() {
|
if let Some(upload) = self
|
||||||
if let Some(upload) = self
|
.metadata
|
||||||
.metadata
|
.load_object_multipart_upload(&object.id)
|
||||||
.load_object_multipart_upload(&object.id)
|
.await
|
||||||
|
.map_err(Self::to_status)?
|
||||||
|
{
|
||||||
|
self.delete_multipart_parts(&upload).await?;
|
||||||
|
self.metadata
|
||||||
|
.delete_object_multipart_upload(&object.id)
|
||||||
.await
|
.await
|
||||||
.map_err(Self::to_status)?
|
.map_err(Self::to_status)?;
|
||||||
{
|
self.metadata
|
||||||
self.delete_multipart_parts(&upload).await?;
|
.delete_multipart_upload(upload.upload_id.as_str())
|
||||||
self.metadata
|
.await
|
||||||
.delete_object_multipart_upload(&object.id)
|
.map_err(Self::to_status)?;
|
||||||
.await
|
|
||||||
.map_err(Self::to_status)?;
|
|
||||||
self.metadata
|
|
||||||
.delete_multipart_upload(upload.upload_id.as_str())
|
|
||||||
.await
|
|
||||||
.map_err(Self::to_status)?;
|
|
||||||
} else {
|
|
||||||
self.storage
|
|
||||||
.delete_object(&object.id)
|
|
||||||
.await
|
|
||||||
.map_err(|e| Status::internal(format!("Failed to delete object: {}", e)))?;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
self.storage
|
self.storage
|
||||||
.delete_object(&object.id)
|
.delete_object(&object.id)
|
||||||
|
|
|
||||||
182
lightningstor/crates/lightningstor-server/src/repair.rs
Normal file
182
lightningstor/crates/lightningstor-server/src/repair.rs
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
use crate::metadata::MetadataStore;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use lightningstor_distributed::{RepairQueue, ReplicatedBackend, ReplicatedRepairTask};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
const REPAIR_SCAN_LIMIT: u32 = 256;
|
||||||
|
const REPAIR_BACKOFF_BASE_MILLIS: u64 = 1_000;
|
||||||
|
const REPAIR_BACKOFF_MAX_MILLIS: u64 = 60_000;
|
||||||
|
const ORPHAN_REPAIR_DROP_ATTEMPTS: u32 = 8;
|
||||||
|
|
||||||
|
pub struct MetadataRepairQueue {
|
||||||
|
metadata: Arc<MetadataStore>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MetadataRepairQueue {
|
||||||
|
pub fn new(metadata: Arc<MetadataStore>) -> Self {
|
||||||
|
Self { metadata }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl RepairQueue for MetadataRepairQueue {
|
||||||
|
async fn enqueue_repair(&self, task: ReplicatedRepairTask) {
|
||||||
|
if let Err(error) = self.metadata.save_replicated_repair_task(&task).await {
|
||||||
|
warn!(
|
||||||
|
task_id = task.id,
|
||||||
|
chunk_key = task.key,
|
||||||
|
error = %error,
|
||||||
|
"failed to persist replicated repair task"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn spawn_replicated_repair_worker(
|
||||||
|
metadata: Arc<MetadataStore>,
|
||||||
|
backend: Arc<ReplicatedBackend>,
|
||||||
|
interval: Duration,
|
||||||
|
) -> JoinHandle<()> {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
loop {
|
||||||
|
if let Err(error) = process_replicated_repair_queue(&metadata, &backend).await {
|
||||||
|
if replicated_repair_queue_transiently_unready(&error) {
|
||||||
|
debug!(error = %error, "replicated repair queue pass deferred until metadata becomes ready");
|
||||||
|
} else {
|
||||||
|
warn!(error = %error, "replicated repair queue pass failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(interval).await;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_replicated_repair_queue(
|
||||||
|
metadata: &MetadataStore,
|
||||||
|
backend: &ReplicatedBackend,
|
||||||
|
) -> Result<(), lightningstor_types::Error> {
|
||||||
|
let now = unix_time_millis();
|
||||||
|
let tasks = metadata
|
||||||
|
.list_replicated_repair_tasks(REPAIR_SCAN_LIMIT)
|
||||||
|
.await?;
|
||||||
|
for mut task in tasks {
|
||||||
|
if !task.is_due(now) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match backend.repair_chunk(&task).await {
|
||||||
|
Ok(()) => {
|
||||||
|
metadata.delete_replicated_repair_task(&task.id).await?;
|
||||||
|
debug!(
|
||||||
|
task_id = task.id,
|
||||||
|
chunk_key = task.key,
|
||||||
|
"repaired replicated chunk"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
if task.attempt_count >= ORPHAN_REPAIR_DROP_ATTEMPTS {
|
||||||
|
match backend.chunk_exists_anywhere(&task.key).await {
|
||||||
|
Ok(false) => {
|
||||||
|
warn!(
|
||||||
|
task_id = task.id,
|
||||||
|
chunk_key = task.key,
|
||||||
|
attempts = task.attempt_count,
|
||||||
|
"dropping orphan replicated repair task with no remaining source replica"
|
||||||
|
);
|
||||||
|
metadata.delete_replicated_repair_task(&task.id).await?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Ok(true) => {}
|
||||||
|
Err(probe_error) => {
|
||||||
|
warn!(
|
||||||
|
task_id = task.id,
|
||||||
|
chunk_key = task.key,
|
||||||
|
error = %probe_error,
|
||||||
|
"failed to probe global chunk existence while evaluating orphan repair task"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let backoff = repair_backoff_millis(task.attempt_count);
|
||||||
|
task.schedule_retry(error.to_string(), backoff);
|
||||||
|
metadata.save_replicated_repair_task(&task).await?;
|
||||||
|
warn!(
|
||||||
|
task_id = task.id,
|
||||||
|
chunk_key = task.key,
|
||||||
|
attempts = task.attempt_count,
|
||||||
|
backoff_millis = backoff,
|
||||||
|
error = %error,
|
||||||
|
"replicated chunk repair failed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unix_time_millis() -> u64 {
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_millis() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn repair_backoff_millis(attempt_count: u32) -> u64 {
|
||||||
|
let exponent = attempt_count.min(6);
|
||||||
|
let multiplier = 1u64 << exponent;
|
||||||
|
(REPAIR_BACKOFF_BASE_MILLIS.saturating_mul(multiplier)).min(REPAIR_BACKOFF_MAX_MILLIS)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn replicated_repair_queue_transiently_unready(error: &lightningstor_types::Error) -> bool {
|
||||||
|
let rendered = error.to_string().to_ascii_lowercase();
|
||||||
|
let transient = rendered.contains("region not found")
|
||||||
|
|| rendered.contains("status: notfound")
|
||||||
|
|| rendered.contains("metadata backend not ready")
|
||||||
|
|| rendered.contains("notleader");
|
||||||
|
if transient {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
match error {
|
||||||
|
lightningstor_types::Error::StorageError(message)
|
||||||
|
| lightningstor_types::Error::Internal(message) => {
|
||||||
|
let message = message.to_ascii_lowercase();
|
||||||
|
message.contains("region not found")
|
||||||
|
|| message.contains("status: notfound")
|
||||||
|
|| message.contains("metadata backend not ready")
|
||||||
|
|| message.contains("notleader")
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::replicated_repair_queue_transiently_unready;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn treats_region_not_found_as_transient_startup_state() {
|
||||||
|
let error = lightningstor_types::Error::StorageError(
|
||||||
|
"FlareDB scan failed: status: NotFound, message: \"region not found\"".to_string(),
|
||||||
|
);
|
||||||
|
assert!(replicated_repair_queue_transiently_unready(&error));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn treats_wrapped_storage_error_rendering_as_transient_startup_state() {
|
||||||
|
let error = lightningstor_types::Error::StorageError(
|
||||||
|
"FlareDB scan failed: status: NotFound, message: \"region not found\", details: [], metadata: MetadataMap { headers: {} }".to_string(),
|
||||||
|
);
|
||||||
|
assert!(replicated_repair_queue_transiently_unready(&error));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keeps_real_repair_failures_as_warnings() {
|
||||||
|
let error =
|
||||||
|
lightningstor_types::Error::StorageError("replication checksum mismatch".to_string());
|
||||||
|
assert!(!replicated_repair_queue_transiently_unready(&error));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,13 +10,17 @@ use axum::{
|
||||||
middleware::Next,
|
middleware::Next,
|
||||||
response::{IntoResponse, Response},
|
response::{IntoResponse, Response},
|
||||||
};
|
};
|
||||||
|
use crate::tenant::TenantContext;
|
||||||
use hmac::{Hmac, Mac};
|
use hmac::{Hmac, Mac};
|
||||||
|
use iam_api::proto::{iam_credential_client::IamCredentialClient, GetSecretKeyRequest};
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::{Mutex, RwLock};
|
||||||
|
use tonic::transport::Channel;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
use url::form_urlencoded;
|
use url::form_urlencoded;
|
||||||
|
use std::time::{Duration as StdDuration, Instant};
|
||||||
|
|
||||||
type HmacSha256 = Hmac<Sha256>;
|
type HmacSha256 = Hmac<Sha256>;
|
||||||
const DEFAULT_MAX_AUTH_BODY_BYTES: usize = 1024 * 1024 * 1024;
|
const DEFAULT_MAX_AUTH_BODY_BYTES: usize = 1024 * 1024 * 1024;
|
||||||
|
|
@ -27,6 +31,13 @@ pub(crate) struct VerifiedBodyBytes(pub Bytes);
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(crate) struct VerifiedPayloadHash(pub String);
|
pub(crate) struct VerifiedPayloadHash(pub String);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub(crate) struct VerifiedTenantContext(pub TenantContext);
|
||||||
|
|
||||||
|
fn should_buffer_auth_body(payload_hash_header: Option<&str>) -> bool {
|
||||||
|
payload_hash_header.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
/// SigV4 authentication state
|
/// SigV4 authentication state
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct AuthState {
|
pub struct AuthState {
|
||||||
|
|
@ -40,21 +51,73 @@ pub struct AuthState {
|
||||||
aws_service: String,
|
aws_service: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Placeholder IAM client (will integrate with real IAM later)
|
|
||||||
pub struct IamClient {
|
pub struct IamClient {
|
||||||
// Stores access_key_id -> secret_key mapping
|
mode: IamClientMode,
|
||||||
credentials: std::collections::HashMap<String, String>,
|
credential_cache: Arc<RwLock<HashMap<String, CachedCredential>>>,
|
||||||
|
cache_ttl: StdDuration,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum IamClientMode {
|
||||||
|
Env {
|
||||||
|
credentials: std::collections::HashMap<String, String>,
|
||||||
|
},
|
||||||
|
Grpc {
|
||||||
|
endpoint: String,
|
||||||
|
channel: Arc<Mutex<Option<Channel>>>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub(crate) struct ResolvedCredential {
|
||||||
|
pub secret_key: String,
|
||||||
|
pub principal_id: String,
|
||||||
|
pub org_id: Option<String>,
|
||||||
|
pub project_id: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CachedCredential {
|
||||||
|
credential: ResolvedCredential,
|
||||||
|
cached_at: Instant,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IamClient {
|
impl IamClient {
|
||||||
/// Create a new IamClient loading credentials from environment variables for MVP.
|
/// Create a new IAM client. If an endpoint is supplied, use the IAM gRPC API.
|
||||||
|
pub fn new(iam_endpoint: Option<String>) -> Self {
|
||||||
|
let cache_ttl = std::env::var("LIGHTNINGSTOR_S3_IAM_CACHE_TTL_SECS")
|
||||||
|
.ok()
|
||||||
|
.and_then(|value| value.parse::<u64>().ok())
|
||||||
|
.map(StdDuration::from_secs)
|
||||||
|
.unwrap_or_else(|| StdDuration::from_secs(30));
|
||||||
|
|
||||||
|
if let Some(endpoint) = iam_endpoint
|
||||||
|
.map(|value| normalize_iam_endpoint(&value))
|
||||||
|
.filter(|value| !value.is_empty())
|
||||||
|
{
|
||||||
|
return Self {
|
||||||
|
mode: IamClientMode::Grpc {
|
||||||
|
endpoint,
|
||||||
|
channel: Arc::new(Mutex::new(None)),
|
||||||
|
},
|
||||||
|
credential_cache: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
cache_ttl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
mode: IamClientMode::Env {
|
||||||
|
credentials: Self::load_env_credentials(),
|
||||||
|
},
|
||||||
|
credential_cache: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
cache_ttl,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load credentials from environment variables for fallback/testing.
|
||||||
///
|
///
|
||||||
/// Supports two formats:
|
/// Supports two formats:
|
||||||
/// 1. Single credential: S3_ACCESS_KEY_ID + S3_SECRET_KEY
|
/// 1. Single credential: S3_ACCESS_KEY_ID + S3_SECRET_KEY
|
||||||
/// 2. Multiple credentials: S3_CREDENTIALS="key1:secret1,key2:secret2,..."
|
/// 2. Multiple credentials: S3_CREDENTIALS="key1:secret1,key2:secret2,..."
|
||||||
///
|
fn load_env_credentials() -> std::collections::HashMap<String, String> {
|
||||||
/// TODO: Replace with proper IAM gRPC integration (see T060)
|
|
||||||
pub fn new() -> Self {
|
|
||||||
let mut credentials = std::collections::HashMap::new();
|
let mut credentials = std::collections::HashMap::new();
|
||||||
|
|
||||||
// Option 1: Multiple credentials via S3_CREDENTIALS
|
// Option 1: Multiple credentials via S3_CREDENTIALS
|
||||||
|
|
@ -87,28 +150,160 @@ impl IamClient {
|
||||||
warn!("Set S3_CREDENTIALS or S3_ACCESS_KEY_ID/S3_SECRET_KEY to enable access.");
|
warn!("Set S3_CREDENTIALS or S3_ACCESS_KEY_ID/S3_SECRET_KEY to enable access.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Self { credentials }
|
credentials
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Validate access key and return secret key
|
#[cfg(test)]
|
||||||
pub async fn get_secret_key(&self, access_key_id: &str) -> Result<String, String> {
|
fn env_credentials(&self) -> Option<&std::collections::HashMap<String, String>> {
|
||||||
self.credentials
|
match &self.mode {
|
||||||
.get(access_key_id)
|
IamClientMode::Env { credentials } => Some(credentials),
|
||||||
.cloned()
|
IamClientMode::Grpc { .. } => None,
|
||||||
.ok_or_else(|| "Access key ID not found".to_string())
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn env_default_tenant() -> (Option<String>, Option<String>) {
|
||||||
|
let org_id = std::env::var("S3_TENANT_ORG_ID")
|
||||||
|
.ok()
|
||||||
|
.or_else(|| std::env::var("S3_ORG_ID").ok())
|
||||||
|
.or_else(|| Some("default".to_string()));
|
||||||
|
let project_id = std::env::var("S3_TENANT_PROJECT_ID")
|
||||||
|
.ok()
|
||||||
|
.or_else(|| std::env::var("S3_PROJECT_ID").ok())
|
||||||
|
.or_else(|| Some("default".to_string()));
|
||||||
|
(org_id, project_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate access key and resolve the credential context.
|
||||||
|
pub async fn get_credential(&self, access_key_id: &str) -> Result<ResolvedCredential, String> {
|
||||||
|
match &self.mode {
|
||||||
|
IamClientMode::Env { credentials } => {
|
||||||
|
let secret_key = credentials
|
||||||
|
.get(access_key_id)
|
||||||
|
.cloned()
|
||||||
|
.ok_or_else(|| "Access key ID not found".to_string())?;
|
||||||
|
let (org_id, project_id) = Self::env_default_tenant();
|
||||||
|
Ok(ResolvedCredential {
|
||||||
|
secret_key,
|
||||||
|
principal_id: access_key_id.to_string(),
|
||||||
|
org_id,
|
||||||
|
project_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
IamClientMode::Grpc { endpoint, channel } => {
|
||||||
|
if let Some(credential) = self.cached_credential(access_key_id).await {
|
||||||
|
return Ok(credential);
|
||||||
|
}
|
||||||
|
|
||||||
|
let response = self
|
||||||
|
.grpc_get_secret_key(endpoint, channel, access_key_id)
|
||||||
|
.await?;
|
||||||
|
let response = response.into_inner();
|
||||||
|
let credential = ResolvedCredential {
|
||||||
|
secret_key: response.secret_key,
|
||||||
|
principal_id: response.principal_id,
|
||||||
|
org_id: response.org_id,
|
||||||
|
project_id: response.project_id,
|
||||||
|
};
|
||||||
|
self.cache_credential(access_key_id, &credential).await;
|
||||||
|
Ok(credential)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cached_credential(&self, access_key_id: &str) -> Option<ResolvedCredential> {
|
||||||
|
let cache = self.credential_cache.read().await;
|
||||||
|
cache.get(access_key_id).and_then(|entry| {
|
||||||
|
if entry.cached_at.elapsed() <= self.cache_ttl {
|
||||||
|
Some(entry.credential.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cache_credential(&self, access_key_id: &str, credential: &ResolvedCredential) {
|
||||||
|
let mut cache = self.credential_cache.write().await;
|
||||||
|
cache.insert(
|
||||||
|
access_key_id.to_string(),
|
||||||
|
CachedCredential {
|
||||||
|
credential: credential.clone(),
|
||||||
|
cached_at: Instant::now(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn grpc_channel(
|
||||||
|
endpoint: &str,
|
||||||
|
channel: &Arc<Mutex<Option<Channel>>>,
|
||||||
|
) -> Result<Channel, String> {
|
||||||
|
let mut cached = channel.lock().await;
|
||||||
|
if let Some(existing) = cached.as_ref() {
|
||||||
|
return Ok(existing.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let created = Channel::from_shared(endpoint.to_string())
|
||||||
|
.map_err(|e| format!("failed to parse IAM credential endpoint: {}", e))?
|
||||||
|
.connect()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("failed to connect to IAM credential service: {}", e))?;
|
||||||
|
*cached = Some(created.clone());
|
||||||
|
Ok(created)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn invalidate_grpc_channel(channel: &Arc<Mutex<Option<Channel>>>) {
|
||||||
|
let mut cached = channel.lock().await;
|
||||||
|
*cached = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn grpc_get_secret_key(
|
||||||
|
&self,
|
||||||
|
endpoint: &str,
|
||||||
|
channel: &Arc<Mutex<Option<Channel>>>,
|
||||||
|
access_key_id: &str,
|
||||||
|
) -> Result<tonic::Response<iam_api::proto::GetSecretKeyResponse>, String> {
|
||||||
|
for attempt in 0..2 {
|
||||||
|
let grpc_channel = Self::grpc_channel(endpoint, channel).await?;
|
||||||
|
let mut client = IamCredentialClient::new(grpc_channel);
|
||||||
|
match client
|
||||||
|
.get_secret_key(GetSecretKeyRequest {
|
||||||
|
access_key_id: access_key_id.to_string(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(response) => return Ok(response),
|
||||||
|
Err(status)
|
||||||
|
if attempt == 0
|
||||||
|
&& matches!(
|
||||||
|
status.code(),
|
||||||
|
tonic::Code::Unavailable
|
||||||
|
| tonic::Code::Cancelled
|
||||||
|
| tonic::Code::Unknown
|
||||||
|
| tonic::Code::DeadlineExceeded
|
||||||
|
| tonic::Code::Internal
|
||||||
|
) =>
|
||||||
|
{
|
||||||
|
Self::invalidate_grpc_channel(channel).await;
|
||||||
|
}
|
||||||
|
Err(status) => return Err(status.message().to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err("IAM credential lookup exhausted retries".to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_iam_endpoint(endpoint: &str) -> String {
|
||||||
|
if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
||||||
|
endpoint.to_string()
|
||||||
|
} else {
|
||||||
|
format!("http://{}", endpoint)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AuthState {
|
impl AuthState {
|
||||||
/// Create new auth state with IAM integration
|
/// Create new auth state with IAM integration
|
||||||
pub fn new(iam_endpoint: Option<String>) -> Self {
|
pub fn new(iam_endpoint: Option<String>) -> Self {
|
||||||
let iam_client = if let Some(_endpoint) = iam_endpoint {
|
let iam_client = Some(Arc::new(RwLock::new(IamClient::new(iam_endpoint))));
|
||||||
// TODO: Connect to real IAM gRPC service
|
|
||||||
// For now, if an endpoint is provided, we still use our env var based client
|
|
||||||
Some(Arc::new(RwLock::new(IamClient::new())))
|
|
||||||
} else {
|
|
||||||
Some(Arc::new(RwLock::new(IamClient::new())))
|
|
||||||
};
|
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
iam_client,
|
iam_client,
|
||||||
|
|
@ -198,9 +393,9 @@ pub async fn sigv4_auth_middleware(
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get secret key from IAM (or use dummy for MVP)
|
// Get secret key from IAM (or use dummy for MVP)
|
||||||
let secret_key = if let Some(ref iam) = auth_state.iam_client {
|
let credential = if let Some(ref iam) = auth_state.iam_client {
|
||||||
match iam.read().await.get_secret_key(&access_key_id).await {
|
match iam.read().await.get_credential(&access_key_id).await {
|
||||||
Ok(key) => key,
|
Ok(credential) => credential,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("IAM credential validation failed: {}", e);
|
warn!("IAM credential validation failed: {}", e);
|
||||||
return error_response(
|
return error_response(
|
||||||
|
|
@ -211,18 +406,22 @@ pub async fn sigv4_auth_middleware(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// This case should ideally not be hit with the current IamClient::new() logic
|
|
||||||
// but kept for safety.
|
|
||||||
debug!("No IAM integration, using dummy secret key if IamClient wasn't initialized.");
|
debug!("No IAM integration, using dummy secret key if IamClient wasn't initialized.");
|
||||||
"dummy_secret_key_for_mvp".to_string()
|
ResolvedCredential {
|
||||||
|
secret_key: "dummy_secret_key_for_mvp".to_string(),
|
||||||
|
principal_id: access_key_id.clone(),
|
||||||
|
org_id: Some("default".to_string()),
|
||||||
|
project_id: Some("default".to_string()),
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
let secret_key = credential.secret_key.as_str();
|
||||||
|
|
||||||
let payload_hash_header = headers
|
let payload_hash_header = headers
|
||||||
.get("x-amz-content-sha256")
|
.get("x-amz-content-sha256")
|
||||||
.and_then(|value| value.to_str().ok())
|
.and_then(|value| value.to_str().ok())
|
||||||
.filter(|value| !value.is_empty())
|
.filter(|value| !value.is_empty())
|
||||||
.map(str::to_string);
|
.map(str::to_string);
|
||||||
let should_buffer_body = !matches!(payload_hash_header.as_deref(), Some(hash) if hash != "UNSIGNED-PAYLOAD");
|
let should_buffer_body = should_buffer_auth_body(payload_hash_header.as_deref());
|
||||||
|
|
||||||
let body_bytes = if should_buffer_body {
|
let body_bytes = if should_buffer_body {
|
||||||
let max_body_bytes = std::env::var("S3_MAX_AUTH_BODY_BYTES")
|
let max_body_bytes = std::env::var("S3_MAX_AUTH_BODY_BYTES")
|
||||||
|
|
@ -282,7 +481,7 @@ pub async fn sigv4_auth_middleware(
|
||||||
);
|
);
|
||||||
|
|
||||||
let expected_signature = match compute_sigv4_signature(
|
let expected_signature = match compute_sigv4_signature(
|
||||||
&secret_key,
|
secret_key,
|
||||||
&method,
|
&method,
|
||||||
&uri,
|
&uri,
|
||||||
&headers,
|
&headers,
|
||||||
|
|
@ -310,6 +509,21 @@ pub async fn sigv4_auth_middleware(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match (credential.org_id, credential.project_id) {
|
||||||
|
(Some(org_id), Some(project_id)) => {
|
||||||
|
request
|
||||||
|
.extensions_mut()
|
||||||
|
.insert(VerifiedTenantContext(TenantContext { org_id, project_id }));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return error_response(
|
||||||
|
StatusCode::FORBIDDEN,
|
||||||
|
"AccessDenied",
|
||||||
|
"S3 credential is missing tenant scope",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Auth successful
|
// Auth successful
|
||||||
debug!("SigV4 auth successful for access_key={}", access_key_id);
|
debug!("SigV4 auth successful for access_key={}", access_key_id);
|
||||||
next.run(request).await
|
next.run(request).await
|
||||||
|
|
@ -558,6 +772,97 @@ fn error_response(status: StatusCode, code: &str, message: &str) -> Response {
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use axum::http::HeaderValue;
|
use axum::http::HeaderValue;
|
||||||
|
use iam_api::proto::{
|
||||||
|
iam_credential_server::{IamCredential, IamCredentialServer},
|
||||||
|
CreateS3CredentialRequest, CreateS3CredentialResponse, Credential, GetSecretKeyResponse,
|
||||||
|
ListCredentialsRequest, ListCredentialsResponse, RevokeCredentialRequest,
|
||||||
|
RevokeCredentialResponse,
|
||||||
|
};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::sync::{atomic::{AtomicUsize, Ordering}, Mutex};
|
||||||
|
use tokio::net::TcpListener;
|
||||||
|
use tokio::time::{sleep, Duration};
|
||||||
|
use tonic::{Request as TonicRequest, Response as TonicResponse, Status};
|
||||||
|
use tonic::transport::Server;
|
||||||
|
|
||||||
|
static ENV_LOCK: Mutex<()> = Mutex::new(());
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
struct MockIamCredentialService {
|
||||||
|
secrets: Arc<HashMap<String, String>>,
|
||||||
|
get_secret_calls: Arc<AtomicUsize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tonic::async_trait]
|
||||||
|
impl IamCredential for MockIamCredentialService {
|
||||||
|
async fn create_s3_credential(
|
||||||
|
&self,
|
||||||
|
_request: TonicRequest<CreateS3CredentialRequest>,
|
||||||
|
) -> Result<TonicResponse<CreateS3CredentialResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("not needed in test"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_secret_key(
|
||||||
|
&self,
|
||||||
|
request: TonicRequest<GetSecretKeyRequest>,
|
||||||
|
) -> Result<TonicResponse<GetSecretKeyResponse>, Status> {
|
||||||
|
let access_key_id = request.into_inner().access_key_id;
|
||||||
|
self.get_secret_calls.fetch_add(1, Ordering::SeqCst);
|
||||||
|
let Some(secret_key) = self.secrets.get(&access_key_id) else {
|
||||||
|
return Err(Status::not_found("access key not found"));
|
||||||
|
};
|
||||||
|
Ok(TonicResponse::new(GetSecretKeyResponse {
|
||||||
|
secret_key: secret_key.clone(),
|
||||||
|
principal_id: "test-principal".to_string(),
|
||||||
|
expires_at: None,
|
||||||
|
org_id: Some("test-org".to_string()),
|
||||||
|
project_id: Some("test-project".to_string()),
|
||||||
|
principal_kind: iam_api::proto::PrincipalKind::ServiceAccount as i32,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_credentials(
|
||||||
|
&self,
|
||||||
|
_request: TonicRequest<ListCredentialsRequest>,
|
||||||
|
) -> Result<TonicResponse<ListCredentialsResponse>, Status> {
|
||||||
|
Ok(TonicResponse::new(ListCredentialsResponse {
|
||||||
|
credentials: Vec::<Credential>::new(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn revoke_credential(
|
||||||
|
&self,
|
||||||
|
_request: TonicRequest<RevokeCredentialRequest>,
|
||||||
|
) -> Result<TonicResponse<RevokeCredentialResponse>, Status> {
|
||||||
|
Ok(TonicResponse::new(RevokeCredentialResponse { success: true }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_mock_iam(secrets: HashMap<String, String>) -> (SocketAddr, Arc<AtomicUsize>) {
|
||||||
|
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||||
|
let addr = listener.local_addr().unwrap();
|
||||||
|
let get_secret_calls = Arc::new(AtomicUsize::new(0));
|
||||||
|
let service = MockIamCredentialService {
|
||||||
|
secrets: Arc::new(secrets),
|
||||||
|
get_secret_calls: get_secret_calls.clone(),
|
||||||
|
};
|
||||||
|
drop(listener);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
Server::builder()
|
||||||
|
.add_service(IamCredentialServer::new(service))
|
||||||
|
.serve(addr)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
});
|
||||||
|
for _ in 0..20 {
|
||||||
|
if tokio::net::TcpStream::connect(addr).await.is_ok() {
|
||||||
|
return (addr, get_secret_calls);
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(25)).await;
|
||||||
|
}
|
||||||
|
panic!("mock IAM server did not start on {}", addr);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_parse_auth_header() {
|
async fn test_parse_auth_header() {
|
||||||
|
|
@ -657,6 +962,13 @@ mod tests {
|
||||||
assert_eq!(hashed_payload, "signed-payload-hash");
|
assert_eq!(hashed_payload, "signed-payload-hash");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_buffer_auth_body_only_when_hash_header_missing() {
|
||||||
|
assert!(should_buffer_auth_body(None));
|
||||||
|
assert!(!should_buffer_auth_body(Some("signed-payload-hash")));
|
||||||
|
assert!(!should_buffer_auth_body(Some("UNSIGNED-PAYLOAD")));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_build_string_to_sign() {
|
fn test_build_string_to_sign() {
|
||||||
let amz_date = "20231201T000000Z";
|
let amz_date = "20231201T000000Z";
|
||||||
|
|
@ -677,34 +989,77 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_iam_client_multi_credentials() {
|
fn test_iam_client_multi_credentials() {
|
||||||
|
let _guard = ENV_LOCK.lock().unwrap();
|
||||||
// Test parsing S3_CREDENTIALS format
|
// Test parsing S3_CREDENTIALS format
|
||||||
std::env::set_var("S3_CREDENTIALS", "key1:secret1,key2:secret2,key3:secret3");
|
std::env::set_var("S3_CREDENTIALS", "key1:secret1,key2:secret2,key3:secret3");
|
||||||
let client = IamClient::new();
|
let client = IamClient::new(None);
|
||||||
|
let credentials = client.env_credentials().unwrap();
|
||||||
|
|
||||||
assert_eq!(client.credentials.len(), 3);
|
assert_eq!(credentials.len(), 3);
|
||||||
assert_eq!(client.credentials.get("key1"), Some(&"secret1".to_string()));
|
assert_eq!(credentials.get("key1"), Some(&"secret1".to_string()));
|
||||||
assert_eq!(client.credentials.get("key2"), Some(&"secret2".to_string()));
|
assert_eq!(credentials.get("key2"), Some(&"secret2".to_string()));
|
||||||
assert_eq!(client.credentials.get("key3"), Some(&"secret3".to_string()));
|
assert_eq!(credentials.get("key3"), Some(&"secret3".to_string()));
|
||||||
|
|
||||||
std::env::remove_var("S3_CREDENTIALS");
|
std::env::remove_var("S3_CREDENTIALS");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_iam_client_single_credentials() {
|
fn test_iam_client_single_credentials() {
|
||||||
|
let _guard = ENV_LOCK.lock().unwrap();
|
||||||
// Test legacy S3_ACCESS_KEY_ID/S3_SECRET_KEY format
|
// Test legacy S3_ACCESS_KEY_ID/S3_SECRET_KEY format
|
||||||
std::env::remove_var("S3_CREDENTIALS");
|
std::env::remove_var("S3_CREDENTIALS");
|
||||||
std::env::set_var("S3_ACCESS_KEY_ID", "test_key");
|
std::env::set_var("S3_ACCESS_KEY_ID", "test_key");
|
||||||
std::env::set_var("S3_SECRET_KEY", "test_secret");
|
std::env::set_var("S3_SECRET_KEY", "test_secret");
|
||||||
|
|
||||||
let client = IamClient::new();
|
let client = IamClient::new(None);
|
||||||
|
let credentials = client.env_credentials().unwrap();
|
||||||
|
|
||||||
assert_eq!(client.credentials.len(), 1);
|
assert_eq!(credentials.len(), 1);
|
||||||
assert_eq!(client.credentials.get("test_key"), Some(&"test_secret".to_string()));
|
assert_eq!(credentials.get("test_key"), Some(&"test_secret".to_string()));
|
||||||
|
|
||||||
std::env::remove_var("S3_ACCESS_KEY_ID");
|
std::env::remove_var("S3_ACCESS_KEY_ID");
|
||||||
std::env::remove_var("S3_SECRET_KEY");
|
std::env::remove_var("S3_SECRET_KEY");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_iam_client_grpc_lookup() {
|
||||||
|
let (addr, _calls) = start_mock_iam(HashMap::from([(
|
||||||
|
"grpc_key".to_string(),
|
||||||
|
"grpc_secret".to_string(),
|
||||||
|
)]))
|
||||||
|
.await;
|
||||||
|
let client = IamClient::new(Some(addr.to_string()));
|
||||||
|
|
||||||
|
let credential = client.get_credential("grpc_key").await.unwrap();
|
||||||
|
assert_eq!(credential.secret_key, "grpc_secret");
|
||||||
|
assert_eq!(credential.org_id.as_deref(), Some("test-org"));
|
||||||
|
assert_eq!(credential.project_id.as_deref(), Some("test-project"));
|
||||||
|
assert_eq!(
|
||||||
|
client.get_credential("missing").await.unwrap_err(),
|
||||||
|
"access key not found"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_iam_client_grpc_cache_reuses_secret() {
|
||||||
|
let (addr, calls) = start_mock_iam(HashMap::from([(
|
||||||
|
"grpc_key".to_string(),
|
||||||
|
"grpc_secret".to_string(),
|
||||||
|
)]))
|
||||||
|
.await;
|
||||||
|
let client = IamClient::new(Some(addr.to_string()));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
client.get_credential("grpc_key").await.unwrap().secret_key,
|
||||||
|
"grpc_secret"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
client.get_credential("grpc_key").await.unwrap().secret_key,
|
||||||
|
"grpc_secret"
|
||||||
|
);
|
||||||
|
assert_eq!(calls.load(Ordering::SeqCst), 1);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_complete_sigv4_signature() {
|
fn test_complete_sigv4_signature() {
|
||||||
// Test with AWS example credentials (from AWS docs)
|
// Test with AWS example credentials (from AWS docs)
|
||||||
|
|
@ -1039,18 +1394,20 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_security_credential_lookup_unknown_key() {
|
fn test_security_credential_lookup_unknown_key() {
|
||||||
|
let _guard = ENV_LOCK.lock().unwrap();
|
||||||
// Test that unknown access keys return the correct result
|
// Test that unknown access keys return the correct result
|
||||||
std::env::remove_var("S3_CREDENTIALS");
|
std::env::remove_var("S3_CREDENTIALS");
|
||||||
std::env::set_var("S3_ACCESS_KEY_ID", "known_key");
|
std::env::set_var("S3_ACCESS_KEY_ID", "known_key");
|
||||||
std::env::set_var("S3_SECRET_KEY", "known_secret");
|
std::env::set_var("S3_SECRET_KEY", "known_secret");
|
||||||
|
|
||||||
let client = IamClient::new();
|
let client = IamClient::new(None);
|
||||||
|
let credentials = client.env_credentials().unwrap();
|
||||||
|
|
||||||
// Known key should be found in credentials map
|
// Known key should be found in credentials map
|
||||||
assert_eq!(client.credentials.get("known_key"), Some(&"known_secret".to_string()));
|
assert_eq!(credentials.get("known_key"), Some(&"known_secret".to_string()));
|
||||||
|
|
||||||
// Unknown key should not be found
|
// Unknown key should not be found
|
||||||
assert_eq!(client.credentials.get("unknown_key"), None);
|
assert_eq!(credentials.get("unknown_key"), None);
|
||||||
|
|
||||||
std::env::remove_var("S3_ACCESS_KEY_ID");
|
std::env::remove_var("S3_ACCESS_KEY_ID");
|
||||||
std::env::remove_var("S3_SECRET_KEY");
|
std::env::remove_var("S3_SECRET_KEY");
|
||||||
|
|
@ -1058,33 +1415,36 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_security_empty_credentials() {
|
fn test_security_empty_credentials() {
|
||||||
|
let _guard = ENV_LOCK.lock().unwrap();
|
||||||
// Test that IamClient keeps credentials empty when none provided
|
// Test that IamClient keeps credentials empty when none provided
|
||||||
std::env::remove_var("S3_CREDENTIALS");
|
std::env::remove_var("S3_CREDENTIALS");
|
||||||
std::env::remove_var("S3_ACCESS_KEY_ID");
|
std::env::remove_var("S3_ACCESS_KEY_ID");
|
||||||
std::env::remove_var("S3_SECRET_KEY");
|
std::env::remove_var("S3_SECRET_KEY");
|
||||||
|
|
||||||
let client = IamClient::new();
|
let client = IamClient::new(None);
|
||||||
|
|
||||||
// No credentials configured
|
// No credentials configured
|
||||||
assert!(client.credentials.is_empty());
|
assert!(client.env_credentials().unwrap().is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_security_malformed_s3_credentials_env() {
|
fn test_security_malformed_s3_credentials_env() {
|
||||||
|
let _guard = ENV_LOCK.lock().unwrap();
|
||||||
// Test that malformed S3_CREDENTIALS are handled gracefully
|
// Test that malformed S3_CREDENTIALS are handled gracefully
|
||||||
|
|
||||||
// Missing colon separator
|
// Missing colon separator
|
||||||
std::env::set_var("S3_CREDENTIALS", "key1_secret1,key2:secret2");
|
std::env::set_var("S3_CREDENTIALS", "key1_secret1,key2:secret2");
|
||||||
let client = IamClient::new();
|
let client = IamClient::new(None);
|
||||||
|
let credentials = client.env_credentials().unwrap();
|
||||||
// Should only parse the valid pair (key2:secret2)
|
// Should only parse the valid pair (key2:secret2)
|
||||||
assert_eq!(client.credentials.len(), 1);
|
assert_eq!(credentials.len(), 1);
|
||||||
assert!(client.credentials.contains_key("key2"));
|
assert!(credentials.contains_key("key2"));
|
||||||
|
|
||||||
// Empty pairs
|
// Empty pairs
|
||||||
std::env::set_var("S3_CREDENTIALS", "key1:secret1,,key2:secret2");
|
std::env::set_var("S3_CREDENTIALS", "key1:secret1,,key2:secret2");
|
||||||
let client2 = IamClient::new();
|
let client2 = IamClient::new(None);
|
||||||
// Should parse both valid pairs, skip empty
|
// Should parse both valid pairs, skip empty
|
||||||
assert_eq!(client2.credentials.len(), 2);
|
assert_eq!(client2.env_credentials().unwrap().len(), 2);
|
||||||
|
|
||||||
std::env::remove_var("S3_CREDENTIALS");
|
std::env::remove_var("S3_CREDENTIALS");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,4 +7,4 @@ mod router;
|
||||||
mod xml;
|
mod xml;
|
||||||
|
|
||||||
pub use auth::{AuthState, sigv4_auth_middleware};
|
pub use auth::{AuthState, sigv4_auth_middleware};
|
||||||
pub use router::{create_router, create_router_with_state};
|
pub use router::{create_router, create_router_with_auth, create_router_with_state};
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -66,6 +66,9 @@ pub struct ListBucketResult {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
#[serde(rename = "Prefix")]
|
#[serde(rename = "Prefix")]
|
||||||
pub prefix: String,
|
pub prefix: String,
|
||||||
|
#[serde(rename = "Marker")]
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub marker: Option<String>,
|
||||||
#[serde(rename = "Delimiter")]
|
#[serde(rename = "Delimiter")]
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub delimiter: Option<String>,
|
pub delimiter: Option<String>,
|
||||||
|
|
@ -73,6 +76,9 @@ pub struct ListBucketResult {
|
||||||
pub max_keys: u32,
|
pub max_keys: u32,
|
||||||
#[serde(rename = "IsTruncated")]
|
#[serde(rename = "IsTruncated")]
|
||||||
pub is_truncated: bool,
|
pub is_truncated: bool,
|
||||||
|
#[serde(rename = "NextMarker")]
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub next_marker: Option<String>,
|
||||||
#[serde(rename = "Contents", default)]
|
#[serde(rename = "Contents", default)]
|
||||||
pub contents: Vec<ObjectEntry>,
|
pub contents: Vec<ObjectEntry>,
|
||||||
#[serde(rename = "CommonPrefixes", default)]
|
#[serde(rename = "CommonPrefixes", default)]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
use tonic::{metadata::MetadataMap, Status};
|
use tonic::{metadata::MetadataMap, Status};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct TenantContext {
|
pub struct TenantContext {
|
||||||
pub org_id: String,
|
pub org_id: String,
|
||||||
pub project_id: String,
|
pub project_id: String,
|
||||||
|
|
|
||||||
146
nix/ci/flake.lock
generated
146
nix/ci/flake.lock
generated
|
|
@ -1,5 +1,26 @@
|
||||||
{
|
{
|
||||||
"nodes": {
|
"nodes": {
|
||||||
|
"disko": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"photoncloud",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1765326679,
|
||||||
|
"narHash": "sha256-fTLX9kDwLr9Y0rH/nG+h1XG5UU+jBcy0PFYn5eneRX8=",
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "disko",
|
||||||
|
"rev": "d64e5cdca35b5fad7c504f615357a7afe6d9c49e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "disko",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
"flake-utils": {
|
"flake-utils": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"systems": "systems"
|
"systems": "systems"
|
||||||
|
|
@ -18,6 +39,43 @@
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"flake-utils_2": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems_2"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nix-nos": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"photoncloud",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"path": "./nix-nos",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"path": "./nix-nos",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"parent": [
|
||||||
|
"photoncloud"
|
||||||
|
]
|
||||||
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1765186076,
|
"lastModified": 1765186076,
|
||||||
|
|
@ -34,14 +92,71 @@
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"nixpkgs_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1765186076,
|
||||||
|
"narHash": "sha256-hM20uyap1a0M9d344I692r+ik4gTMyj60cQWO+hAYP8=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "addf7cf5f383a3101ecfba091b98d0a1263dc9b8",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"photoncloud": {
|
||||||
|
"inputs": {
|
||||||
|
"disko": "disko",
|
||||||
|
"flake-utils": "flake-utils_2",
|
||||||
|
"nix-nos": "nix-nos",
|
||||||
|
"nixpkgs": "nixpkgs_2",
|
||||||
|
"rust-overlay": "rust-overlay",
|
||||||
|
"systems": "systems_3"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"path": "../..",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"path": "../..",
|
||||||
|
"type": "path"
|
||||||
|
},
|
||||||
|
"parent": []
|
||||||
|
},
|
||||||
"root": {
|
"root": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-utils": "flake-utils",
|
"flake-utils": "flake-utils",
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"rust-overlay": "rust-overlay"
|
"photoncloud": "photoncloud",
|
||||||
|
"rust-overlay": "rust-overlay_2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"rust-overlay": {
|
"rust-overlay": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"photoncloud",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1765465581,
|
||||||
|
"narHash": "sha256-fCXT0aZXmTalM3NPCTedVs9xb0egBG5BOZkcrYo5PGE=",
|
||||||
|
"owner": "oxalica",
|
||||||
|
"repo": "rust-overlay",
|
||||||
|
"rev": "99cc5667eece98bb35dcf35f7e511031a8b7a125",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "oxalica",
|
||||||
|
"repo": "rust-overlay",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"rust-overlay_2": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": [
|
"nixpkgs": [
|
||||||
"nixpkgs"
|
"nixpkgs"
|
||||||
|
|
@ -75,6 +190,35 @@
|
||||||
"repo": "default",
|
"repo": "default",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"systems_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems_3": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "systems",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
photoncloud.url = "path:../..";
|
||||||
|
|
||||||
rust-overlay = {
|
rust-overlay = {
|
||||||
url = "github:oxalica/rust-overlay";
|
url = "github:oxalica/rust-overlay";
|
||||||
|
|
@ -12,7 +13,7 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, flake-utils, rust-overlay }:
|
outputs = { self, nixpkgs, flake-utils, photoncloud, rust-overlay }:
|
||||||
flake-utils.lib.eachDefaultSystem (system:
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
let
|
let
|
||||||
overlays = [ (import rust-overlay) ];
|
overlays = [ (import rust-overlay) ];
|
||||||
|
|
@ -201,7 +202,7 @@
|
||||||
|
|
||||||
if [[ "$no_logs" == "0" ]]; then
|
if [[ "$no_logs" == "0" ]]; then
|
||||||
local out
|
local out
|
||||||
out="$logdir/shared_${crate}.$(echo "$title" | tr '[:upper:]' '[:lower:]' | tr ' ' '_' | tr -cd 'a-z0-9_').log"
|
out="$logdir/shared_''${crate}.$(echo "$title" | tr '[:upper:]' '[:lower:]' | tr ' ' '_' | tr -cd 'a-z0-9_').log"
|
||||||
(cd "$repo_root" && bash -c "$cmd") 2>&1 | tee "$out"
|
(cd "$repo_root" && bash -c "$cmd") 2>&1 | tee "$out"
|
||||||
else
|
else
|
||||||
(cd "$repo_root" && bash -c "$cmd")
|
(cd "$repo_root" && bash -c "$cmd")
|
||||||
|
|
@ -291,6 +292,11 @@
|
||||||
${gate}/bin/photoncloud-gate --tier 0 --no-logs
|
${gate}/bin/photoncloud-gate --tier 0 --no-logs
|
||||||
touch $out/ok
|
touch $out/ok
|
||||||
'';
|
'';
|
||||||
|
checks.deployer-vm-smoke = photoncloud.checks.${system}.deployer-vm-smoke;
|
||||||
|
checks.deployer-vm-rollback = photoncloud.checks.${system}.deployer-vm-rollback;
|
||||||
|
checks.deployer-bootstrap-e2e = photoncloud.checks.${system}.deployer-bootstrap-e2e;
|
||||||
|
checks.host-lifecycle-e2e = photoncloud.checks.${system}.host-lifecycle-e2e;
|
||||||
|
checks.fleet-scheduler-e2e = photoncloud.checks.${system}.fleet-scheduler-e2e;
|
||||||
|
|
||||||
devShells.default = pkgs.mkShell {
|
devShells.default = pkgs.mkShell {
|
||||||
name = "photoncloud-ci-dev";
|
name = "photoncloud-ci-dev";
|
||||||
|
|
|
||||||
67
nix/images/deployer-vm-smoke-target.nix
Normal file
67
nix/images/deployer-vm-smoke-target.nix
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
{ lib, modulesPath, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
"${modulesPath}/virtualisation/qemu-vm.nix"
|
||||||
|
"${modulesPath}/testing/test-instrumentation.nix"
|
||||||
|
];
|
||||||
|
|
||||||
|
boot.loader.grub = {
|
||||||
|
enable = true;
|
||||||
|
device = "/dev/vda";
|
||||||
|
forceInstall = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
fileSystems."/" = {
|
||||||
|
device = "/dev/disk/by-label/nixos";
|
||||||
|
fsType = "ext4";
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hostName = "worker";
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
networking.useDHCP = lib.mkForce false;
|
||||||
|
networking.dhcpcd.enable = lib.mkForce false;
|
||||||
|
systemd.network = {
|
||||||
|
enable = true;
|
||||||
|
networks."10-eth0" = {
|
||||||
|
matchConfig.Name = "eth0";
|
||||||
|
networkConfig.DHCP = "yes";
|
||||||
|
linkConfig.RequiredForOnline = "routable";
|
||||||
|
};
|
||||||
|
networks."20-eth1" = {
|
||||||
|
matchConfig.Name = "eth1";
|
||||||
|
address = [ "192.168.1.2/24" ];
|
||||||
|
linkConfig.RequiredForOnline = "routable";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
nix.registry = lib.mkForce { };
|
||||||
|
nix.nixPath = lib.mkForce [ ];
|
||||||
|
nix.channel.enable = false;
|
||||||
|
nix.settings = {
|
||||||
|
experimental-features = [
|
||||||
|
"nix-command"
|
||||||
|
"flakes"
|
||||||
|
];
|
||||||
|
flake-registry = "";
|
||||||
|
};
|
||||||
|
nixpkgs.flake = {
|
||||||
|
source = lib.mkForce null;
|
||||||
|
setFlakeRegistry = lib.mkForce false;
|
||||||
|
setNixPath = lib.mkForce false;
|
||||||
|
};
|
||||||
|
|
||||||
|
system.switch.enable = lib.mkForce true;
|
||||||
|
system.nixos.label = lib.mkForce "vm-smoke-target";
|
||||||
|
system.nixos.version = lib.mkForce "vm-smoke-target";
|
||||||
|
system.nixos.versionSuffix = lib.mkForce "-vm-smoke-target";
|
||||||
|
environment.etc."photon-vm-smoke-target".text = "vm-smoke-target\n";
|
||||||
|
|
||||||
|
documentation.enable = false;
|
||||||
|
documentation.nixos.enable = false;
|
||||||
|
documentation.man.enable = false;
|
||||||
|
documentation.info.enable = false;
|
||||||
|
documentation.doc.enable = false;
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
}
|
||||||
|
|
@ -33,6 +33,12 @@ let
|
||||||
|
|
||||||
mkDesiredSystemType = types: types.submodule {
|
mkDesiredSystemType = types: types.submodule {
|
||||||
options = {
|
options = {
|
||||||
|
deploymentId = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional host deployment identifier owning this desired system";
|
||||||
|
};
|
||||||
|
|
||||||
nixosConfiguration = mkOption {
|
nixosConfiguration = mkOption {
|
||||||
type = types.nullOr types.str;
|
type = types.nullOr types.str;
|
||||||
default = null;
|
default = null;
|
||||||
|
|
@ -62,9 +68,122 @@ let
|
||||||
default = null;
|
default = null;
|
||||||
description = "Whether nix-agent should roll back when the health check fails";
|
description = "Whether nix-agent should roll back when the health check fails";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
drainBeforeApply = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether the controller should drain the node before issuing this desired system";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mkHostDeploymentSelectorType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
nodeIds = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Explicit node IDs targeted by the deployment";
|
||||||
|
};
|
||||||
|
|
||||||
|
roles = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Node roles targeted by the deployment";
|
||||||
|
};
|
||||||
|
|
||||||
|
pools = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Node pools targeted by the deployment";
|
||||||
|
};
|
||||||
|
|
||||||
|
nodeClasses = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Node classes targeted by the deployment";
|
||||||
|
};
|
||||||
|
|
||||||
|
matchLabels = mkOption {
|
||||||
|
type = types.attrsOf types.str;
|
||||||
|
default = { };
|
||||||
|
description = "Label selectors applied to target nodes";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkHostDeploymentType = types:
|
||||||
|
let
|
||||||
|
selectorType = mkHostDeploymentSelectorType types;
|
||||||
|
in types.submodule {
|
||||||
|
options = {
|
||||||
|
selector = mkOption {
|
||||||
|
type = selectorType;
|
||||||
|
default = { };
|
||||||
|
description = "Node selector used by the host deployment";
|
||||||
|
};
|
||||||
|
|
||||||
|
nixosConfiguration = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Name of the nixosConfigurations output to roll out";
|
||||||
|
};
|
||||||
|
|
||||||
|
flakeRef = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Explicit flake reference used during rollout";
|
||||||
|
};
|
||||||
|
|
||||||
|
batchSize = mkOption {
|
||||||
|
type = types.nullOr types.int;
|
||||||
|
default = null;
|
||||||
|
description = "Maximum number of nodes started per reconciliation wave";
|
||||||
|
};
|
||||||
|
|
||||||
|
maxUnavailable = mkOption {
|
||||||
|
type = types.nullOr types.int;
|
||||||
|
default = null;
|
||||||
|
description = "Maximum number of unavailable nodes allowed during rollout";
|
||||||
|
};
|
||||||
|
|
||||||
|
healthCheckCommand = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Health check command executed by nix-agent after activation";
|
||||||
|
};
|
||||||
|
|
||||||
|
switchAction = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "switch-to-configuration action used by nix-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
rollbackOnFailure = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether nodes should roll back when rollout health checks fail";
|
||||||
|
};
|
||||||
|
|
||||||
|
drainBeforeApply = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether the controller should drain a node before applying the rollout";
|
||||||
|
};
|
||||||
|
|
||||||
|
rebootPolicy = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Operator-facing reboot policy associated with the rollout";
|
||||||
|
};
|
||||||
|
|
||||||
|
paused = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether the rollout should start in a paused state";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
mkNodeType = types:
|
mkNodeType = types:
|
||||||
let
|
let
|
||||||
installPlanType = mkInstallPlanType types;
|
installPlanType = mkInstallPlanType types;
|
||||||
|
|
@ -159,6 +278,30 @@ let
|
||||||
default = null;
|
default = null;
|
||||||
description = "Desired deployer node lifecycle state";
|
description = "Desired deployer node lifecycle state";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
commissionState = mkOption {
|
||||||
|
type = types.nullOr (types.enum [ "discovered" "commissioning" "commissioned" ]);
|
||||||
|
default = null;
|
||||||
|
description = "Optional commissioning state exported into deployer cluster state";
|
||||||
|
};
|
||||||
|
|
||||||
|
installState = mkOption {
|
||||||
|
type = types.nullOr (types.enum [ "pending" "installing" "installed" "failed" "reinstall_requested" ]);
|
||||||
|
default = null;
|
||||||
|
description = "Optional install lifecycle state exported into deployer cluster state";
|
||||||
|
};
|
||||||
|
|
||||||
|
powerState = mkOption {
|
||||||
|
type = types.nullOr (types.enum [ "on" "off" "cycling" "unknown" ]);
|
||||||
|
default = null;
|
||||||
|
description = "Optional external power-management state associated with the node";
|
||||||
|
};
|
||||||
|
|
||||||
|
bmcRef = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional BMC / Redfish reference associated with the node";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -339,7 +482,10 @@ let
|
||||||
mkDesiredSystem = nodeName: desiredSystem:
|
mkDesiredSystem = nodeName: desiredSystem:
|
||||||
let
|
let
|
||||||
rendered =
|
rendered =
|
||||||
optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
|
optionalAttrs (desiredSystem != null && desiredSystem.deploymentId != null) {
|
||||||
|
deployment_id = desiredSystem.deploymentId;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
|
||||||
nixos_configuration = desiredSystem.nixosConfiguration;
|
nixos_configuration = desiredSystem.nixosConfiguration;
|
||||||
}
|
}
|
||||||
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
|
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
|
||||||
|
|
@ -353,12 +499,60 @@ let
|
||||||
}
|
}
|
||||||
// optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) {
|
// optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) {
|
||||||
rollback_on_failure = desiredSystem.rollbackOnFailure;
|
rollback_on_failure = desiredSystem.rollbackOnFailure;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.drainBeforeApply != null) {
|
||||||
|
drain_before_apply = desiredSystem.drainBeforeApply;
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
if desiredSystem == null || rendered == { } then null else {
|
if desiredSystem == null || rendered == { } then null else {
|
||||||
node_id = nodeName;
|
node_id = nodeName;
|
||||||
} // rendered;
|
} // rendered;
|
||||||
|
|
||||||
|
mkHostDeploymentSelector = selector:
|
||||||
|
{
|
||||||
|
node_ids = selector.nodeIds or [ ];
|
||||||
|
roles = selector.roles or [ ];
|
||||||
|
pools = selector.pools or [ ];
|
||||||
|
node_classes = selector.nodeClasses or [ ];
|
||||||
|
match_labels = selector.matchLabels or { };
|
||||||
|
};
|
||||||
|
|
||||||
|
mkDeployerHostDeploymentSpec = name: deployment:
|
||||||
|
{
|
||||||
|
inherit name;
|
||||||
|
selector = mkHostDeploymentSelector deployment.selector;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.nixosConfiguration != null) {
|
||||||
|
nixos_configuration = deployment.nixosConfiguration;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.flakeRef != null) {
|
||||||
|
flake_ref = deployment.flakeRef;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.batchSize != null) {
|
||||||
|
batch_size = deployment.batchSize;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.maxUnavailable != null) {
|
||||||
|
max_unavailable = deployment.maxUnavailable;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.healthCheckCommand != [ ]) {
|
||||||
|
health_check_command = deployment.healthCheckCommand;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.switchAction != null) {
|
||||||
|
switch_action = deployment.switchAction;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.rollbackOnFailure != null) {
|
||||||
|
rollback_on_failure = deployment.rollbackOnFailure;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.drainBeforeApply != null) {
|
||||||
|
drain_before_apply = deployment.drainBeforeApply;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.rebootPolicy != null) {
|
||||||
|
reboot_policy = deployment.rebootPolicy;
|
||||||
|
}
|
||||||
|
// optionalAttrs (deployment.paused != null) {
|
||||||
|
paused = deployment.paused;
|
||||||
|
};
|
||||||
|
|
||||||
mkDeployerNodeSpec = nodeName: node:
|
mkDeployerNodeSpec = nodeName: node:
|
||||||
{
|
{
|
||||||
node_id = nodeName;
|
node_id = nodeName;
|
||||||
|
|
@ -390,6 +584,18 @@ let
|
||||||
}
|
}
|
||||||
// optionalAttrs (node.state != null) {
|
// optionalAttrs (node.state != null) {
|
||||||
state = node.state;
|
state = node.state;
|
||||||
|
}
|
||||||
|
// optionalAttrs (node.commissionState != null) {
|
||||||
|
commission_state = node.commissionState;
|
||||||
|
}
|
||||||
|
// optionalAttrs (node.installState != null) {
|
||||||
|
install_state = node.installState;
|
||||||
|
}
|
||||||
|
// optionalAttrs (node.powerState != null) {
|
||||||
|
power_state = node.powerState;
|
||||||
|
}
|
||||||
|
// optionalAttrs (node.bmcRef != null) {
|
||||||
|
bmc_ref = node.bmcRef;
|
||||||
};
|
};
|
||||||
|
|
||||||
mkDeployerNodeClassSpec = name: nodeClass:
|
mkDeployerNodeClassSpec = name: nodeClass:
|
||||||
|
|
@ -522,6 +728,7 @@ let
|
||||||
nodeClasses = deployer.nodeClasses or { };
|
nodeClasses = deployer.nodeClasses or { };
|
||||||
pools = deployer.pools or { };
|
pools = deployer.pools or { };
|
||||||
enrollmentRules = deployer.enrollmentRules or { };
|
enrollmentRules = deployer.enrollmentRules or { };
|
||||||
|
hostDeployments = deployer.hostDeployments or { };
|
||||||
in {
|
in {
|
||||||
cluster = {
|
cluster = {
|
||||||
cluster_id = clusterId;
|
cluster_id = clusterId;
|
||||||
|
|
@ -532,6 +739,7 @@ let
|
||||||
node_classes = map (name: mkDeployerNodeClassSpec name nodeClasses.${name}) (attrNames nodeClasses);
|
node_classes = map (name: mkDeployerNodeClassSpec name nodeClasses.${name}) (attrNames nodeClasses);
|
||||||
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
|
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
|
||||||
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
|
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
|
||||||
|
host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments);
|
||||||
services = [ ];
|
services = [ ];
|
||||||
instances = [ ];
|
instances = [ ];
|
||||||
mtls_policies = [ ];
|
mtls_policies = [ ];
|
||||||
|
|
@ -541,6 +749,8 @@ in
|
||||||
inherit
|
inherit
|
||||||
mkInstallPlanType
|
mkInstallPlanType
|
||||||
mkDesiredSystemType
|
mkDesiredSystemType
|
||||||
|
mkHostDeploymentSelectorType
|
||||||
|
mkHostDeploymentType
|
||||||
mkNodeType
|
mkNodeType
|
||||||
mkNodeClassType
|
mkNodeClassType
|
||||||
mkNodePoolType
|
mkNodePoolType
|
||||||
|
|
|
||||||
|
|
@ -2,30 +2,112 @@
|
||||||
|
|
||||||
let
|
let
|
||||||
cfg = config.services.coronafs;
|
cfg = config.services.coronafs;
|
||||||
|
chainfireEnabled = lib.hasAttrByPath [ "services" "chainfire" "enable" ] config && config.services.chainfire.enable;
|
||||||
|
chainfireApiUrls =
|
||||||
|
if cfg.chainfireApiUrl != null then
|
||||||
|
lib.filter (item: item != "") (map lib.strings.trim (lib.splitString "," cfg.chainfireApiUrl))
|
||||||
|
else
|
||||||
|
[ ];
|
||||||
|
effectiveChainfireApiUrl =
|
||||||
|
if cfg.chainfireApiUrl != null then cfg.chainfireApiUrl
|
||||||
|
else if chainfireEnabled then "http://127.0.0.1:${toString config.services.chainfire.httpPort}"
|
||||||
|
else null;
|
||||||
|
localChainfireApiUrl =
|
||||||
|
lib.any
|
||||||
|
(url:
|
||||||
|
lib.hasPrefix "http://127.0.0.1:" url
|
||||||
|
|| lib.hasPrefix "http://localhost:" url
|
||||||
|
)
|
||||||
|
(
|
||||||
|
if effectiveChainfireApiUrl == null then
|
||||||
|
[ ]
|
||||||
|
else if cfg.chainfireApiUrl != null then
|
||||||
|
chainfireApiUrls
|
||||||
|
else
|
||||||
|
[ effectiveChainfireApiUrl ]
|
||||||
|
);
|
||||||
|
waitForChainfire =
|
||||||
|
pkgs.writeShellScript "coronafs-wait-for-chainfire" ''
|
||||||
|
set -eu
|
||||||
|
deadline=$((SECONDS + 60))
|
||||||
|
urls='${lib.concatStringsSep " " (
|
||||||
|
if effectiveChainfireApiUrl == null then
|
||||||
|
[ ]
|
||||||
|
else if cfg.chainfireApiUrl != null then
|
||||||
|
chainfireApiUrls
|
||||||
|
else
|
||||||
|
[ effectiveChainfireApiUrl ]
|
||||||
|
)}'
|
||||||
|
while true; do
|
||||||
|
for url in $urls; do
|
||||||
|
if curl -fsS "$url/health" >/dev/null 2>&1; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$SECONDS" -ge "$deadline" ]; then
|
||||||
|
echo "timed out waiting for ChainFire at ${if effectiveChainfireApiUrl == null then "(none)" else effectiveChainfireApiUrl}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
'';
|
||||||
tomlFormat = pkgs.formats.toml { };
|
tomlFormat = pkgs.formats.toml { };
|
||||||
coronafsConfigFile = tomlFormat.generate "coronafs.toml" {
|
coronafsConfigFile = tomlFormat.generate "coronafs.toml" (
|
||||||
listen_addr = "0.0.0.0:${toString cfg.port}";
|
{
|
||||||
advertise_host = cfg.advertiseHost;
|
mode = cfg.mode;
|
||||||
data_dir = toString cfg.dataDir;
|
metadata_backend = cfg.metadataBackend;
|
||||||
export_bind_addr = cfg.exportBindAddr;
|
chainfire_key_prefix = cfg.chainfireKeyPrefix;
|
||||||
export_base_port = cfg.exportBasePort;
|
listen_addr = "0.0.0.0:${toString cfg.port}";
|
||||||
export_port_count = cfg.exportPortCount;
|
advertise_host = cfg.advertiseHost;
|
||||||
export_shared_clients = cfg.exportSharedClients;
|
data_dir = toString cfg.dataDir;
|
||||||
export_cache_mode = cfg.exportCacheMode;
|
export_bind_addr = cfg.exportBindAddr;
|
||||||
export_aio_mode = cfg.exportAioMode;
|
export_base_port = cfg.exportBasePort;
|
||||||
export_discard_mode = cfg.exportDiscardMode;
|
export_port_count = cfg.exportPortCount;
|
||||||
export_detect_zeroes_mode = cfg.exportDetectZeroesMode;
|
export_shared_clients = cfg.exportSharedClients;
|
||||||
preallocate = cfg.preallocate;
|
export_cache_mode = cfg.exportCacheMode;
|
||||||
sync_on_write = cfg.syncOnWrite;
|
export_aio_mode = cfg.exportAioMode;
|
||||||
qemu_nbd_path = "${pkgs.qemu}/bin/qemu-nbd";
|
export_discard_mode = cfg.exportDiscardMode;
|
||||||
qemu_img_path = "${pkgs.qemu}/bin/qemu-img";
|
export_detect_zeroes_mode = cfg.exportDetectZeroesMode;
|
||||||
log_level = "info";
|
preallocate = cfg.preallocate;
|
||||||
};
|
sync_on_write = cfg.syncOnWrite;
|
||||||
|
qemu_nbd_path = "${pkgs.qemu}/bin/qemu-nbd";
|
||||||
|
qemu_img_path = "${pkgs.qemu}/bin/qemu-img";
|
||||||
|
log_level = "info";
|
||||||
|
}
|
||||||
|
// lib.optionalAttrs (effectiveChainfireApiUrl != null) {
|
||||||
|
chainfire_api_url = effectiveChainfireApiUrl;
|
||||||
|
}
|
||||||
|
);
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
options.services.coronafs = {
|
options.services.coronafs = {
|
||||||
enable = lib.mkEnableOption "CoronaFS block volume service";
|
enable = lib.mkEnableOption "CoronaFS block volume service";
|
||||||
|
|
||||||
|
mode = lib.mkOption {
|
||||||
|
type = lib.types.enum [ "combined" "controller" "node" ];
|
||||||
|
default = "combined";
|
||||||
|
description = "CoronaFS operating mode: combined compatibility mode, controller-only API, or node-local export mode.";
|
||||||
|
};
|
||||||
|
|
||||||
|
metadataBackend = lib.mkOption {
|
||||||
|
type = lib.types.enum [ "filesystem" "chainfire" ];
|
||||||
|
default = "filesystem";
|
||||||
|
description = "Metadata backend for CoronaFS volume metadata. Use chainfire on controller nodes to replicate volume metadata.";
|
||||||
|
};
|
||||||
|
|
||||||
|
chainfireApiUrl = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional ChainFire HTTP API URL used when metadataBackend = chainfire. Comma-separated endpoints are allowed for failover.";
|
||||||
|
example = "http://127.0.0.1:8081";
|
||||||
|
};
|
||||||
|
|
||||||
|
chainfireKeyPrefix = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "/coronafs/volumes";
|
||||||
|
description = "ChainFire key prefix used to store CoronaFS metadata when metadataBackend = chainfire.";
|
||||||
|
};
|
||||||
|
|
||||||
port = lib.mkOption {
|
port = lib.mkOption {
|
||||||
type = lib.types.port;
|
type = lib.types.port;
|
||||||
default = 50088;
|
default = 50088;
|
||||||
|
|
@ -71,7 +153,7 @@ in
|
||||||
|
|
||||||
exportAioMode = lib.mkOption {
|
exportAioMode = lib.mkOption {
|
||||||
type = lib.types.enum [ "native" "io_uring" "threads" ];
|
type = lib.types.enum [ "native" "io_uring" "threads" ];
|
||||||
default = "io_uring";
|
default = "threads";
|
||||||
description = "qemu-nbd AIO mode for CoronaFS exports.";
|
description = "qemu-nbd AIO mode for CoronaFS exports.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -113,11 +195,22 @@ in
|
||||||
};
|
};
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf cfg.enable {
|
||||||
|
assertions = [
|
||||||
|
{
|
||||||
|
assertion = cfg.metadataBackend != "chainfire" || effectiveChainfireApiUrl != null;
|
||||||
|
message = "services.coronafs.metadataBackend = \"chainfire\" requires services.coronafs.chainfireApiUrl or a local services.chainfire instance.";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
users.users.coronafs = {
|
users.users.coronafs = {
|
||||||
isSystemUser = true;
|
isSystemUser = true;
|
||||||
group = "coronafs";
|
group = "coronafs";
|
||||||
description = "CoronaFS service user";
|
description = "CoronaFS service user";
|
||||||
home = cfg.dataDir;
|
home = cfg.dataDir;
|
||||||
|
extraGroups =
|
||||||
|
lib.optional
|
||||||
|
(lib.hasAttrByPath [ "services" "plasmavmc" "enable" ] config && config.services.plasmavmc.enable)
|
||||||
|
"plasmavmc";
|
||||||
};
|
};
|
||||||
|
|
||||||
users.groups.coronafs = { };
|
users.groups.coronafs = { };
|
||||||
|
|
@ -125,8 +218,9 @@ in
|
||||||
systemd.services.coronafs = {
|
systemd.services.coronafs = {
|
||||||
description = "CoronaFS Block Volume Service";
|
description = "CoronaFS Block Volume Service";
|
||||||
wantedBy = [ "multi-user.target" ];
|
wantedBy = [ "multi-user.target" ];
|
||||||
after = [ "network.target" ];
|
after = [ "network.target" ] ++ lib.optionals chainfireEnabled [ "chainfire.service" ];
|
||||||
path = [ pkgs.qemu pkgs.util-linux pkgs.procps pkgs.coreutils ];
|
wants = lib.optionals chainfireEnabled [ "chainfire.service" ];
|
||||||
|
path = [ pkgs.qemu pkgs.util-linux pkgs.procps pkgs.coreutils pkgs.curl ];
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
Type = "simple";
|
Type = "simple";
|
||||||
|
|
@ -138,13 +232,14 @@ in
|
||||||
StateDirectory = "coronafs";
|
StateDirectory = "coronafs";
|
||||||
StateDirectoryMode = "0750";
|
StateDirectoryMode = "0750";
|
||||||
ReadWritePaths = [ cfg.dataDir ];
|
ReadWritePaths = [ cfg.dataDir ];
|
||||||
|
ExecStartPre = lib.optionals (cfg.metadataBackend == "chainfire" && localChainfireApiUrl) [ waitForChainfire ];
|
||||||
ExecStart = "${cfg.package}/bin/coronafs-server --config ${coronafsConfigFile}";
|
ExecStart = "${cfg.package}/bin/coronafs-server --config ${coronafsConfigFile}";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.tmpfiles.rules = [
|
systemd.tmpfiles.rules = [
|
||||||
"d ${toString cfg.dataDir} 0750 coronafs coronafs -"
|
"d ${toString cfg.dataDir} 0750 coronafs coronafs -"
|
||||||
"d ${toString cfg.dataDir}/volumes 0750 coronafs coronafs -"
|
"d ${toString cfg.dataDir}/volumes 2770 coronafs coronafs -"
|
||||||
"d ${toString cfg.dataDir}/metadata 0750 coronafs coronafs -"
|
"d ${toString cfg.dataDir}/metadata 0750 coronafs coronafs -"
|
||||||
"d ${toString cfg.dataDir}/pids 0750 coronafs coronafs -"
|
"d ${toString cfg.dataDir}/pids 0750 coronafs coronafs -"
|
||||||
];
|
];
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,23 @@
|
||||||
let
|
let
|
||||||
cfg = config.services.deployer;
|
cfg = config.services.deployer;
|
||||||
tomlFormat = pkgs.formats.toml { };
|
tomlFormat = pkgs.formats.toml { };
|
||||||
|
usesLocalChainfire =
|
||||||
|
builtins.any
|
||||||
|
(
|
||||||
|
endpoint:
|
||||||
|
lib.hasPrefix "http://127.0.0.1:" endpoint
|
||||||
|
|| lib.hasPrefix "http://localhost:" endpoint
|
||||||
|
|| lib.hasPrefix "http://[::1]:" endpoint
|
||||||
|
)
|
||||||
|
cfg.chainfireEndpoints;
|
||||||
|
localChainfireDeps =
|
||||||
|
lib.optionals
|
||||||
|
(
|
||||||
|
usesLocalChainfire
|
||||||
|
&& lib.hasAttrByPath [ "services" "chainfire" "enable" ] config
|
||||||
|
&& config.services.chainfire.enable
|
||||||
|
)
|
||||||
|
[ "chainfire.service" ];
|
||||||
generatedConfig = {
|
generatedConfig = {
|
||||||
bind_addr = cfg.bindAddr;
|
bind_addr = cfg.bindAddr;
|
||||||
chainfire = {
|
chainfire = {
|
||||||
|
|
@ -226,7 +243,9 @@ in
|
||||||
systemd.services.deployer = {
|
systemd.services.deployer = {
|
||||||
description = "PlasmaCloud Deployer Server";
|
description = "PlasmaCloud Deployer Server";
|
||||||
wantedBy = [ "multi-user.target" ];
|
wantedBy = [ "multi-user.target" ];
|
||||||
after = [ "network.target" ];
|
wants = [ "network-online.target" ] ++ localChainfireDeps;
|
||||||
|
after = [ "network-online.target" ] ++ localChainfireDeps;
|
||||||
|
requires = localChainfireDeps;
|
||||||
|
|
||||||
environment = {}
|
environment = {}
|
||||||
// lib.optionalAttrs (cfg.bootstrapToken != null) {
|
// lib.optionalAttrs (cfg.bootstrapToken != null) {
|
||||||
|
|
|
||||||
|
|
@ -285,7 +285,7 @@ in
|
||||||
healthUrl = "http://localhost:8082/health"; # Health endpoint on admin port
|
healthUrl = "http://localhost:8082/health"; # Health endpoint on admin port
|
||||||
leaderUrlKey = "flaredb_leader_url";
|
leaderUrlKey = "flaredb_leader_url";
|
||||||
defaultLeaderUrl = "http://localhost:8082";
|
defaultLeaderUrl = "http://localhost:8082";
|
||||||
joinPath = null;
|
joinPath = "/admin/member/add";
|
||||||
port = cfg.flaredbPort;
|
port = cfg.flaredbPort;
|
||||||
description = "FlareDB";
|
description = "FlareDB";
|
||||||
} // {
|
} // {
|
||||||
|
|
|
||||||
|
|
@ -297,6 +297,30 @@ in
|
||||||
description = "Prometheus metrics port for lightningstor-node.";
|
description = "Prometheus metrics port for lightningstor-node.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
s3StreamingPutThresholdBytes = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 64 * 1024 * 1024;
|
||||||
|
description = "Streaming PUT multipart threshold for the S3 frontend.";
|
||||||
|
};
|
||||||
|
|
||||||
|
s3InlinePutMaxBytes = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 128 * 1024 * 1024;
|
||||||
|
description = "Maximum inline single-PUT size for the S3 frontend.";
|
||||||
|
};
|
||||||
|
|
||||||
|
s3MultipartPutConcurrency = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 4;
|
||||||
|
description = "Maximum in-flight multipart PUT part uploads.";
|
||||||
|
};
|
||||||
|
|
||||||
|
s3MultipartFetchConcurrency = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 4;
|
||||||
|
description = "Maximum concurrent multipart GET part fetches.";
|
||||||
|
};
|
||||||
|
|
||||||
databaseUrl = lib.mkOption {
|
databaseUrl = lib.mkOption {
|
||||||
type = lib.types.nullOr lib.types.str;
|
type = lib.types.nullOr lib.types.str;
|
||||||
default = null;
|
default = null;
|
||||||
|
|
@ -369,6 +393,14 @@ in
|
||||||
|
|
||||||
environment = {
|
environment = {
|
||||||
RUST_LOG = "info";
|
RUST_LOG = "info";
|
||||||
|
LIGHTNINGSTOR_S3_STREAMING_PUT_THRESHOLD_BYTES =
|
||||||
|
toString cfg.s3StreamingPutThresholdBytes;
|
||||||
|
LIGHTNINGSTOR_S3_INLINE_PUT_MAX_BYTES =
|
||||||
|
toString cfg.s3InlinePutMaxBytes;
|
||||||
|
LIGHTNINGSTOR_S3_MULTIPART_PUT_CONCURRENCY =
|
||||||
|
toString cfg.s3MultipartPutConcurrency;
|
||||||
|
LIGHTNINGSTOR_S3_MULTIPART_FETCH_CONCURRENCY =
|
||||||
|
toString cfg.s3MultipartFetchConcurrency;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ let
|
||||||
nodeClassType = clusterConfigLib.mkNodeClassType types;
|
nodeClassType = clusterConfigLib.mkNodeClassType types;
|
||||||
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
||||||
enrollmentRuleType = clusterConfigLib.mkEnrollmentRuleType types;
|
enrollmentRuleType = clusterConfigLib.mkEnrollmentRuleType types;
|
||||||
|
hostDeploymentType = clusterConfigLib.mkHostDeploymentType types;
|
||||||
jsonFormat = pkgs.formats.json { };
|
jsonFormat = pkgs.formats.json { };
|
||||||
|
|
||||||
# Generate cluster-config.json for the current node
|
# Generate cluster-config.json for the current node
|
||||||
|
|
@ -98,6 +99,12 @@ in {
|
||||||
default = { };
|
default = { };
|
||||||
description = "Deployer auto-enrollment rules derived from Nix";
|
description = "Deployer auto-enrollment rules derived from Nix";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
hostDeployments = mkOption {
|
||||||
|
type = types.attrsOf hostDeploymentType;
|
||||||
|
default = { };
|
||||||
|
description = "Declarative host rollout objects derived from Nix";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
generated = {
|
generated = {
|
||||||
|
|
@ -173,6 +180,16 @@ in {
|
||||||
) (attrNames cfg.deployer.enrollmentRules);
|
) (attrNames cfg.deployer.enrollmentRules);
|
||||||
message = "All deployer enrollment rules must reference existing pools and node classes";
|
message = "All deployer enrollment rules must reference existing pools and node classes";
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
assertion = all (deploymentName:
|
||||||
|
let
|
||||||
|
deployment = cfg.deployer.hostDeployments.${deploymentName};
|
||||||
|
in
|
||||||
|
all (pool: cfg.deployer.pools ? "${pool}") deployment.selector.pools
|
||||||
|
&& all (nodeClass: cfg.deployer.nodeClasses ? "${nodeClass}") deployment.selector.nodeClasses
|
||||||
|
) (attrNames cfg.deployer.hostDeployments);
|
||||||
|
message = "All deployer host deployments must reference existing pools and node classes";
|
||||||
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
# Generate cluster-config.json for first-boot-automation
|
# Generate cluster-config.json for first-boot-automation
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,30 @@
|
||||||
|
|
||||||
let
|
let
|
||||||
cfg = config.services.plasmavmc;
|
cfg = config.services.plasmavmc;
|
||||||
|
localIamDeps = lib.optional (config.services.iam.enable or false) "iam.service";
|
||||||
|
localIamHealthUrl =
|
||||||
|
if config.services.iam.enable or false
|
||||||
|
then "http://127.0.0.1:${toString config.services.iam.httpPort}/health"
|
||||||
|
else null;
|
||||||
|
remoteIamEndpoint =
|
||||||
|
if !(config.services.iam.enable or false) && cfg.iamAddr != null
|
||||||
|
then cfg.iamAddr
|
||||||
|
else null;
|
||||||
coronafsEnabled = lib.hasAttrByPath [ "services" "coronafs" "enable" ] config && config.services.coronafs.enable;
|
coronafsEnabled = lib.hasAttrByPath [ "services" "coronafs" "enable" ] config && config.services.coronafs.enable;
|
||||||
coronafsDataDir =
|
coronafsDataDir =
|
||||||
if coronafsEnabled && lib.hasAttrByPath [ "services" "coronafs" "dataDir" ] config
|
if coronafsEnabled && lib.hasAttrByPath [ "services" "coronafs" "dataDir" ] config
|
||||||
then toString config.services.coronafs.dataDir
|
then toString config.services.coronafs.dataDir
|
||||||
else null;
|
else null;
|
||||||
|
effectiveCoronafsControllerEndpoint =
|
||||||
|
if cfg.coronafsControllerEndpoint != null then cfg.coronafsControllerEndpoint
|
||||||
|
else if cfg.coronafsEndpoint != null then cfg.coronafsEndpoint
|
||||||
|
else if coronafsEnabled then "http://127.0.0.1:${toString config.services.coronafs.port}"
|
||||||
|
else null;
|
||||||
|
effectiveCoronafsNodeEndpoint =
|
||||||
|
if cfg.coronafsNodeEndpoint != null then cfg.coronafsNodeEndpoint
|
||||||
|
else if coronafsEnabled then "http://127.0.0.1:${toString config.services.coronafs.port}"
|
||||||
|
else if cfg.coronafsEndpoint != null then cfg.coronafsEndpoint
|
||||||
|
else null;
|
||||||
tomlFormat = pkgs.formats.toml { };
|
tomlFormat = pkgs.formats.toml { };
|
||||||
plasmavmcConfigFile = tomlFormat.generate "plasmavmc.toml" {
|
plasmavmcConfigFile = tomlFormat.generate "plasmavmc.toml" {
|
||||||
addr = "0.0.0.0:${toString cfg.port}";
|
addr = "0.0.0.0:${toString cfg.port}";
|
||||||
|
|
@ -94,10 +113,41 @@ in
|
||||||
coronafsEndpoint = lib.mkOption {
|
coronafsEndpoint = lib.mkOption {
|
||||||
type = lib.types.nullOr lib.types.str;
|
type = lib.types.nullOr lib.types.str;
|
||||||
default = null;
|
default = null;
|
||||||
description = "CoronaFS HTTP endpoint used to provision and export managed VM volumes.";
|
description = "Deprecated combined CoronaFS HTTP endpoint used to provision and export managed VM volumes.";
|
||||||
example = "http://10.0.0.11:50088";
|
example = "http://10.0.0.11:50088";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
coronafsControllerEndpoint = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.str;
|
||||||
|
default = null;
|
||||||
|
description = "CoronaFS controller HTTP endpoint used to provision and resize managed VM volumes. Comma-separated endpoints are allowed for client-side failover.";
|
||||||
|
example = "http://10.0.0.11:50088";
|
||||||
|
};
|
||||||
|
|
||||||
|
coronafsNodeEndpoint = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.str;
|
||||||
|
default = null;
|
||||||
|
description = "CoronaFS node-local HTTP endpoint used to resolve local paths and exports for attached VM volumes. Comma-separated endpoints are allowed for client-side failover.";
|
||||||
|
example = "http://127.0.0.1:50088";
|
||||||
|
};
|
||||||
|
|
||||||
|
coronafsNodeLocalAttach = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = ''
|
||||||
|
Enable writable VM attachment through node-local CoronaFS materialization.
|
||||||
|
This requires services.plasmavmc.sharedLiveMigration = false because migrations use cold relocate plus flush-back.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
experimentalCoronafsNodeLocalAttach = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = ''
|
||||||
|
Deprecated alias for services.plasmavmc.coronafsNodeLocalAttach.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
managedVolumeRoot = lib.mkOption {
|
managedVolumeRoot = lib.mkOption {
|
||||||
type = lib.types.path;
|
type = lib.types.path;
|
||||||
default = "/var/lib/plasmavmc/managed-volumes";
|
default = "/var/lib/plasmavmc/managed-volumes";
|
||||||
|
|
@ -173,6 +223,24 @@ in
|
||||||
};
|
};
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf cfg.enable {
|
||||||
|
assertions = [
|
||||||
|
{
|
||||||
|
assertion = !((cfg.coronafsNodeLocalAttach || cfg.experimentalCoronafsNodeLocalAttach) && cfg.sharedLiveMigration);
|
||||||
|
message = ''
|
||||||
|
services.plasmavmc.coronafsNodeLocalAttach requires services.plasmavmc.sharedLiveMigration = false
|
||||||
|
because writable node-local CoronaFS attachment uses cold relocate plus flush-back instead of shared-storage live migration.
|
||||||
|
'';
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
warnings =
|
||||||
|
lib.optional (cfg.coronafsEndpoint != null) ''
|
||||||
|
services.plasmavmc.coronafsEndpoint is deprecated; use services.plasmavmc.coronafsControllerEndpoint and services.plasmavmc.coronafsNodeEndpoint.
|
||||||
|
''
|
||||||
|
++ lib.optional (cfg.experimentalCoronafsNodeLocalAttach) ''
|
||||||
|
services.plasmavmc.experimentalCoronafsNodeLocalAttach is deprecated; use services.plasmavmc.coronafsNodeLocalAttach.
|
||||||
|
'';
|
||||||
|
|
||||||
# Create system user
|
# Create system user
|
||||||
users.users.plasmavmc = {
|
users.users.plasmavmc = {
|
||||||
isSystemUser = true;
|
isSystemUser = true;
|
||||||
|
|
@ -188,9 +256,35 @@ in
|
||||||
systemd.services.plasmavmc = {
|
systemd.services.plasmavmc = {
|
||||||
description = "PlasmaVMC Virtual Machine Compute Service";
|
description = "PlasmaVMC Virtual Machine Compute Service";
|
||||||
wantedBy = [ "multi-user.target" ];
|
wantedBy = [ "multi-user.target" ];
|
||||||
after = [ "network.target" "prismnet.service" "flaredb.service" "chainfire.service" ];
|
after = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps;
|
||||||
wants = [ "prismnet.service" "flaredb.service" "chainfire.service" ];
|
wants = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps;
|
||||||
path = [ pkgs.qemu pkgs.coreutils ];
|
path = [ pkgs.qemu pkgs.coreutils pkgs.curl ];
|
||||||
|
preStart =
|
||||||
|
lib.optionalString (localIamHealthUrl != null) ''
|
||||||
|
for _ in $(seq 1 90); do
|
||||||
|
if curl -fsS ${lib.escapeShellArg localIamHealthUrl} >/dev/null 2>&1; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo "plasmavmc: timed out waiting for local IAM health at ${localIamHealthUrl}" >&2
|
||||||
|
exit 1
|
||||||
|
''
|
||||||
|
+ lib.optionalString (remoteIamEndpoint != null) ''
|
||||||
|
endpoint=${lib.escapeShellArg remoteIamEndpoint}
|
||||||
|
endpoint="''${endpoint#http://}"
|
||||||
|
endpoint="''${endpoint#https://}"
|
||||||
|
host="''${endpoint%:*}"
|
||||||
|
port="''${endpoint##*:}"
|
||||||
|
for _ in $(${pkgs.coreutils}/bin/seq 1 90); do
|
||||||
|
if ${pkgs.coreutils}/bin/timeout 1 ${pkgs.bash}/bin/bash -lc "</dev/tcp/''${host}/''${port}" >/dev/null 2>&1; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo "plasmavmc: timed out waiting for IAM gRPC at ''${host}:''${port}" >&2
|
||||||
|
exit 1
|
||||||
|
'';
|
||||||
|
|
||||||
environment = lib.mkMerge [
|
environment = lib.mkMerge [
|
||||||
{
|
{
|
||||||
|
|
@ -213,6 +307,16 @@ in
|
||||||
(lib.mkIf (cfg.lightningstorAddr != null) {
|
(lib.mkIf (cfg.lightningstorAddr != null) {
|
||||||
PLASMAVMC_LIGHTNINGSTOR_ENDPOINT = cfg.lightningstorAddr;
|
PLASMAVMC_LIGHTNINGSTOR_ENDPOINT = cfg.lightningstorAddr;
|
||||||
})
|
})
|
||||||
|
(lib.mkIf (effectiveCoronafsControllerEndpoint != null) {
|
||||||
|
PLASMAVMC_CORONAFS_CONTROLLER_ENDPOINT = effectiveCoronafsControllerEndpoint;
|
||||||
|
})
|
||||||
|
(lib.mkIf (effectiveCoronafsNodeEndpoint != null) {
|
||||||
|
PLASMAVMC_CORONAFS_NODE_ENDPOINT = effectiveCoronafsNodeEndpoint;
|
||||||
|
})
|
||||||
|
(lib.mkIf (cfg.coronafsNodeLocalAttach || cfg.experimentalCoronafsNodeLocalAttach) {
|
||||||
|
PLASMAVMC_CORONAFS_NODE_LOCAL_ATTACH = "1";
|
||||||
|
PLASMAVMC_CORONAFS_ENABLE_EXPERIMENTAL_NODE_LOCAL_ATTACH = "1";
|
||||||
|
})
|
||||||
(lib.mkIf (cfg.coronafsEndpoint != null) {
|
(lib.mkIf (cfg.coronafsEndpoint != null) {
|
||||||
PLASMAVMC_CORONAFS_ENDPOINT = cfg.coronafsEndpoint;
|
PLASMAVMC_CORONAFS_ENDPOINT = cfg.coronafsEndpoint;
|
||||||
})
|
})
|
||||||
|
|
@ -273,6 +377,8 @@ in
|
||||||
systemd.tmpfiles.rules = [
|
systemd.tmpfiles.rules = [
|
||||||
"d ${builtins.dirOf (toString cfg.managedVolumeRoot)} 0755 plasmavmc plasmavmc -"
|
"d ${builtins.dirOf (toString cfg.managedVolumeRoot)} 0755 plasmavmc plasmavmc -"
|
||||||
"d ${toString cfg.managedVolumeRoot} 0750 plasmavmc plasmavmc -"
|
"d ${toString cfg.managedVolumeRoot} 0750 plasmavmc plasmavmc -"
|
||||||
|
] ++ lib.optionals coronafsEnabled [
|
||||||
|
"d ${toString cfg.dataDir}/images 2770 plasmavmc coronafs -"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -108,6 +108,19 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
hostDeployments = {
|
||||||
|
control-plane-canary = {
|
||||||
|
selector.nodeIds = [ "node01" ];
|
||||||
|
nixosConfiguration = "node01";
|
||||||
|
flakeRef = "github:centra/cloud";
|
||||||
|
batchSize = 1;
|
||||||
|
maxUnavailable = 1;
|
||||||
|
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
||||||
|
switchAction = "switch";
|
||||||
|
rollbackOnFailure = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,8 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "192.168.100.11:2379";
|
chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379";
|
||||||
flaredbAddr = "192.168.100.11:2479";
|
flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.openssh.enable = true;
|
services.openssh.enable = true;
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,8 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "192.168.100.11:2379";
|
chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379";
|
||||||
flaredbAddr = "192.168.100.11:2479";
|
flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.openssh.enable = true;
|
services.openssh.enable = true;
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,8 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "192.168.100.11:2379";
|
chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379";
|
||||||
flaredbAddr = "192.168.100.11:2479";
|
flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.openssh.enable = true;
|
services.openssh.enable = true;
|
||||||
|
|
|
||||||
|
|
@ -63,10 +63,13 @@ Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#c
|
||||||
|
|
||||||
Preferred entrypoint for publishable matrix verification: `nix run ./nix/test-cluster#cluster -- fresh-matrix`
|
Preferred entrypoint for publishable matrix verification: `nix run ./nix/test-cluster#cluster -- fresh-matrix`
|
||||||
|
|
||||||
`nix run ./nix/test-cluster#cluster -- bench-storage` benchmarks CoronaFS local-vs-shared-volume I/O, queued random-read behavior, cross-worker direct-I/O shared-volume reads, and LightningStor large/small-object S3 throughput and writes a report to `docs/storage-benchmarks.md`.
|
`nix run ./nix/test-cluster#cluster -- bench-storage` benchmarks CoronaFS controller-export vs node-local-export I/O, worker-side materialization latency, and LightningStor large/small-object S3 throughput, then writes a report to `docs/storage-benchmarks.md`.
|
||||||
|
|
||||||
Preferred entrypoint for publishable storage numbers: `nix run ./nix/test-cluster#cluster -- fresh-storage-bench`
|
Preferred entrypoint for publishable storage numbers: `nix run ./nix/test-cluster#cluster -- fresh-storage-bench`
|
||||||
|
|
||||||
|
`nix run ./nix/test-cluster#cluster -- bench-coronafs-local-matrix` runs the local single-process CoronaFS export benchmark across the supported `cache`/`aio` combinations so software-path regressions can be separated from VM-lab network limits.
|
||||||
|
On the current lab hosts, `cache=none` with `aio=io_uring` is the strongest local-export profile and should be treated as the reference point when CoronaFS remote numbers are being distorted by the nested-QEMU/VDE network path.
|
||||||
|
|
||||||
## Advanced usage
|
## Advanced usage
|
||||||
|
|
||||||
Use the script entrypoint only for local debugging inside a prepared Nix shell:
|
Use the script entrypoint only for local debugging inside a prepared Nix shell:
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,18 @@ in
|
||||||
default = "/tmp/photoncloud-test-cluster-vde.sock";
|
default = "/tmp/photoncloud-test-cluster-vde.sock";
|
||||||
description = "VDE control socket path used for the east-west cluster NIC.";
|
description = "VDE control socket path used for the east-west cluster NIC.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
chainfireControlPlaneAddrs = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "10.100.0.11:2379,10.100.0.12:2379,10.100.0.13:2379";
|
||||||
|
description = "Comma-separated ChainFire client endpoints for multi-endpoint failover.";
|
||||||
|
};
|
||||||
|
|
||||||
|
flaredbControlPlaneAddrs = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "10.100.0.11:2479,10.100.0.12:2479,10.100.0.13:2479";
|
||||||
|
description = "Comma-separated FlareDB client endpoints for multi-endpoint failover.";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
|
|
@ -84,10 +96,43 @@ in
|
||||||
|
|
||||||
system.stateVersion = "24.05";
|
system.stateVersion = "24.05";
|
||||||
|
|
||||||
|
systemd.services.photon-test-cluster-net-tuning = {
|
||||||
|
description = "Tune cluster NIC offloads for nested-QEMU storage tests";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network-online.target" ];
|
||||||
|
wants = [ "network-online.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
};
|
||||||
|
path = [ pkgs.ethtool pkgs.iproute2 pkgs.coreutils ];
|
||||||
|
script = ''
|
||||||
|
set -eu
|
||||||
|
iface="eth1"
|
||||||
|
for _ in $(seq 1 30); do
|
||||||
|
if ip link show "$iface" >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if ! ip link show "$iface" >/dev/null 2>&1; then
|
||||||
|
echo "photon-test-cluster-net-tuning: $iface not present, skipping" >&2
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Nested QEMU over VDE is sensitive to guest-side offloads; disabling
|
||||||
|
# them reduces retransmits and keeps the storage benchmarks closer to
|
||||||
|
# raw TCP throughput.
|
||||||
|
ethtool -K "$iface" tso off gso off gro off tx off rx off sg off || true
|
||||||
|
ip link set dev "$iface" txqueuelen 10000 || true
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
awscli2
|
awscli2
|
||||||
curl
|
curl
|
||||||
dnsutils
|
dnsutils
|
||||||
|
ethtool
|
||||||
fio
|
fio
|
||||||
jq
|
jq
|
||||||
grpcurl
|
grpcurl
|
||||||
|
|
|
||||||
|
|
@ -115,12 +115,17 @@
|
||||||
curl
|
curl
|
||||||
grpcurl
|
grpcurl
|
||||||
jq
|
jq
|
||||||
|
llvmPackages.clang
|
||||||
|
llvmPackages.libclang
|
||||||
openssh
|
openssh
|
||||||
|
protobuf
|
||||||
clusterPython
|
clusterPython
|
||||||
qemu
|
qemu
|
||||||
sshpass
|
sshpass
|
||||||
vde2
|
vde2
|
||||||
];
|
];
|
||||||
|
LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib";
|
||||||
|
PROTOC = "${pkgs.protobuf}/bin/protoc";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -69,29 +69,29 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "10.100.0.11:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
services.prismnet = {
|
services.prismnet = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50081;
|
port = 50081;
|
||||||
iamAddr = "10.100.0.11:50080";
|
iamAddr = "10.100.0.11:50080";
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
services.flashdns = {
|
services.flashdns = {
|
||||||
enable = true;
|
enable = true;
|
||||||
iamAddr = "10.100.0.11:50080";
|
iamAddr = "10.100.0.11:50080";
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
services.fiberlb = {
|
services.fiberlb = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50085;
|
port = 50085;
|
||||||
iamAddr = "10.100.0.11:50080";
|
iamAddr = "10.100.0.11:50080";
|
||||||
chainfireAddr = "10.100.0.11:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
services.plasmavmc = {
|
services.plasmavmc = {
|
||||||
|
|
@ -101,14 +101,17 @@
|
||||||
httpPort = 8084;
|
httpPort = 8084;
|
||||||
prismnetAddr = "10.100.0.11:50081";
|
prismnetAddr = "10.100.0.11:50081";
|
||||||
iamAddr = "10.100.0.11:50080";
|
iamAddr = "10.100.0.11:50080";
|
||||||
chainfireAddr = "10.100.0.11:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
lightningstorAddr = "10.100.0.11:50086";
|
lightningstorAddr = "10.100.0.11:50086";
|
||||||
coronafsEndpoint = "http://10.100.0.11:50088";
|
coronafsControllerEndpoint = "http://127.0.0.1:50088";
|
||||||
|
coronafsNodeEndpoint = "http://127.0.0.1:50088";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.coronafs = {
|
services.coronafs = {
|
||||||
enable = true;
|
enable = true;
|
||||||
|
metadataBackend = "chainfire";
|
||||||
|
chainfireKeyPrefix = "/coronafs/test-cluster/control/volumes";
|
||||||
port = 50088;
|
port = 50088;
|
||||||
advertiseHost = "10.100.0.11";
|
advertiseHost = "10.100.0.11";
|
||||||
exportBasePort = 11000;
|
exportBasePort = 11000;
|
||||||
|
|
@ -138,9 +141,9 @@
|
||||||
readQuorum = 1;
|
readQuorum = 1;
|
||||||
writeQuorum = 2;
|
writeQuorum = 2;
|
||||||
nodeMetricsPort = 9198;
|
nodeMetricsPort = 9198;
|
||||||
chainfireAddr = "10.100.0.11:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
iamAddr = "10.100.0.11:50080";
|
iamAddr = "10.100.0.11:50080";
|
||||||
flaredbAddr = "10.100.0.11:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
zone = "zone-a";
|
zone = "zone-a";
|
||||||
region = "test";
|
region = "test";
|
||||||
};
|
};
|
||||||
|
|
@ -149,10 +152,10 @@
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50087;
|
port = 50087;
|
||||||
iamAddr = "http://10.100.0.11:50080";
|
iamAddr = "http://10.100.0.11:50080";
|
||||||
chainfireAddr = "http://10.100.0.11:2379";
|
chainfireAddr = "http://${config.photonTestCluster.chainfireControlPlaneAddrs}";
|
||||||
prismnetAddr = "http://10.100.0.11:50081";
|
prismnetAddr = "http://10.100.0.11:50081";
|
||||||
flaredbPdAddr = "10.100.0.11:2379";
|
flaredbPdAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbDirectAddr = "10.100.0.11:2479";
|
flaredbDirectAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
fiberlbAddr = "http://10.100.0.11:50085";
|
fiberlbAddr = "http://10.100.0.11:50085";
|
||||||
flashdnsAddr = "http://10.100.0.11:50084";
|
flashdnsAddr = "http://10.100.0.11:50084";
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,6 @@
|
||||||
nodeId = "node02";
|
nodeId = "node02";
|
||||||
raftAddr = "10.100.0.12:2480";
|
raftAddr = "10.100.0.12:2480";
|
||||||
apiAddr = "10.100.0.12:2479";
|
apiAddr = "10.100.0.12:2479";
|
||||||
pdAddr = "10.100.0.11:2379";
|
|
||||||
initialPeers = [
|
initialPeers = [
|
||||||
"node01=10.100.0.11:2479"
|
"node01=10.100.0.11:2479"
|
||||||
"node02=10.100.0.12:2479"
|
"node02=10.100.0.12:2479"
|
||||||
|
|
@ -63,8 +62,8 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "10.100.0.12:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbAddr = "10.100.0.12:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.services.iam.environment = {
|
systemd.services.iam.environment = {
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,6 @@
|
||||||
nodeId = "node03";
|
nodeId = "node03";
|
||||||
raftAddr = "10.100.0.13:2480";
|
raftAddr = "10.100.0.13:2480";
|
||||||
apiAddr = "10.100.0.13:2479";
|
apiAddr = "10.100.0.13:2479";
|
||||||
pdAddr = "10.100.0.11:2379";
|
|
||||||
initialPeers = [
|
initialPeers = [
|
||||||
"node01=10.100.0.11:2479"
|
"node01=10.100.0.11:2479"
|
||||||
"node02=10.100.0.12:2479"
|
"node02=10.100.0.12:2479"
|
||||||
|
|
@ -63,8 +62,8 @@
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 50080;
|
port = 50080;
|
||||||
chainfireAddr = "10.100.0.13:2379";
|
chainfireAddr = config.photonTestCluster.chainfireControlPlaneAddrs;
|
||||||
flaredbAddr = "10.100.0.13:2479";
|
flaredbAddr = config.photonTestCluster.flaredbControlPlaneAddrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.services.iam.environment = {
|
systemd.services.iam.environment = {
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue