From eaee9aad08433fa56852db75def576b87704b87f Mon Sep 17 00:00:00 2001 From: centra Date: Fri, 12 Dec 2025 06:31:19 +0900 Subject: [PATCH] fix(creditservice): Replace non-existent txn() with compare_and_swap() - Remove chainfire_client.txn() calls (method doesn't exist) - Use compare_and_swap(key, 0, value) for atomic wallet creation - Use put() for wallet updates (CAS on version deferred to later) - Remove unused proto imports (TxnRequest, TxnResponse, etc.) - Simplify error handling using CasOutcome.success This fixes compilation errors found in audit. CreditService now compiles successfully. Refs: Audit Fix 1/3 --- .../src/chainfire_storage.rs | 68 +++++-------------- docs/por/POR.md | 44 ++++++------ docs/por/T058-s3-auth-hardening/task.yaml | 41 +++++++++-- docs/por/T059-audit-fix/task.yaml | 33 +++++++++ docs/por/scope.yaml | 3 +- 5 files changed, 108 insertions(+), 81 deletions(-) create mode 100644 docs/por/T059-audit-fix/task.yaml diff --git a/creditservice/crates/creditservice-api/src/chainfire_storage.rs b/creditservice/crates/creditservice-api/src/chainfire_storage.rs index e2b6b92..e4eb8ba 100644 --- a/creditservice/crates/creditservice-api/src/chainfire_storage.rs +++ b/creditservice/crates/creditservice-api/src/chainfire_storage.rs @@ -2,14 +2,12 @@ use async_trait::async_trait; use chainfire_client::Client as ChainFireClient; -use chainfire_proto::proto::{compare, kv, Request as TxnRequest, Response as TxnResponse}; // Correct proto imports for kv_proto types -use prost_types::Value as ProtoValue; // Use ProtoValue to avoid conflict with prost_types::Value use creditservice_types::{Error, Quota, Reservation, ResourceType, Result, Transaction, Wallet}; use serde::{Deserialize, Serialize}; +use std::ops::DerefMut; // Import DerefMut for MutexGuard use std::sync::Arc; use tokio::sync::Mutex; // Import Mutex -use tracing::{debug, error, warn}; -use std::ops::DerefMut; // Import DerefMut for MutexGuard +use tracing::debug; use super::CreditStorage; @@ -84,37 +82,18 @@ impl CreditStorage for ChainFireStorage { let key = Self::wallet_key(&wallet.project_id); let serialized_wallet = Self::serialize(&wallet)?; - let txn = TxnRequest { - compare: vec![Compare { - key: key.clone().into_bytes(), - range_end: vec![], - target: Some(compare::compare::Target::Version(0)), // Version 0 for NotExists - result: compare::CompareResult::Equal as i32, - }], - success: vec![kv::RequestOp { - request: Some(kv::request_op::Request::RequestPut(kv::PutRequest { - key: key.clone().into_bytes(), - value: serialized_wallet, - lease: 0, - prev_kv: false, - })), - }], - failure: vec![], // No failure ops for this case - }; - + // Use compare_and_swap to atomically create only if doesn't exist (version 0) let mut client = self.client.lock().await; // Lock client - let resp = client.deref_mut().txn(txn).await.map_err(|e| Error::Storage(e.to_string()))?; + let outcome = client.deref_mut() + .compare_and_swap(&key, 0, &serialized_wallet) + .await + .map_err(|e| Error::Storage(e.to_string()))?; - if resp.succeeded { // TxnResponse has `succeeded` field + if outcome.success { Ok(wallet) } else { - let existing_wallet: Option = self.get_wallet(&wallet.project_id).await?; - if existing_wallet.is_some() { - Err(Error::WalletAlreadyExists(wallet.project_id)) - } else { - error!("Failed to create wallet for project {}: {:?}", wallet.project_id, resp.error); - Err(Error::Storage(format!("Failed to create wallet: {:?}", resp.error))) - } + // CAS failed - wallet already exists (current_version > 0) + Err(Error::WalletAlreadyExists(wallet.project_id)) } } @@ -122,29 +101,14 @@ impl CreditStorage for ChainFireStorage { let key = Self::wallet_key(&wallet.project_id); let serialized_wallet = Self::serialize(&wallet)?; - // For now, simple put. Proper implementation needs CAS on version field. - let txn = TxnRequest { - compare: vec![], // No compare for simple update - success: vec![kv::RequestOp { - request: Some(kv::request_op::Request::RequestPut(kv::PutRequest { - key: key.clone().into_bytes(), - value: serialized_wallet, - lease: 0, - prev_kv: false, - })), - }], - failure: vec![], - }; - + // Simple put for now. Proper implementation would use CAS on version field. let mut client = self.client.lock().await; // Lock client - let resp = client.deref_mut().txn(txn).await.map_err(|e| Error::Storage(e.to_string()))?; + client.deref_mut() + .put(&key, serialized_wallet) + .await + .map_err(|e| Error::Storage(e.to_string()))?; - if resp.succeeded { // TxnResponse has `succeeded` field - Ok(wallet) - } else { - error!("Failed to update wallet for project {}: {:?}", wallet.project_id, resp.error); - Err(Error::Storage(format!("Failed to update wallet: {:?}", resp.error))) - } + Ok(wallet) } async fn delete_wallet(&self, project_id: &str) -> Result { diff --git a/docs/por/POR.md b/docs/por/POR.md index b59af3b..c6f23d8 100644 --- a/docs/por/POR.md +++ b/docs/por/POR.md @@ -10,8 +10,8 @@ ## Deliverables (top-level) > **Naming (2025-12-11):** Nightlight→NightLight, PrismNET→PrismNET, PlasmaCloud→PhotonCloud -- chainfire - cluster KVS lib - crates/chainfire-* - operational (T053 Cleanup Planned) -- iam (aegis) - IAM platform - iam/crates/* - operational +- chainfire - cluster KVS lib - crates/chainfire-* - TESTS FAIL (DELETE broken, 3/3 integration tests fail) +- iam (aegis) - IAM platform - iam/crates/* - TESTS FAIL (private module visibility issue) - flaredb - DBaaS KVS - flaredb/crates/* - operational - plasmavmc - VM infra - plasmavmc/crates/* - operational (T054 Ops Planned) - lightningstor - object storage - lightningstor/crates/* - operational (T047 Complete, T058 Auth Planned) @@ -21,10 +21,10 @@ - k8shost - K8s hosting (k3s-style) - k8shost/crates/* - operational (T025 MVP complete, T057 Resource Mgmt Planned) - baremetal - Nix bare-metal provisioning - baremetal/* - operational (T032 COMPLETE) - **nightlight** (ex-nightlight) - metrics/observability - nightlight/* - operational (T033 COMPLETE - Item 12 ✓) -- **creditservice** - credit/quota management - creditservice/crates/* - operational (T042 MVP COMPLETE, T052 Persistence PLANNED) +- **creditservice** - credit/quota management - creditservice/crates/* - BROKEN (doesn't compile - missing txn API) ## MVP Milestones -- **MVP-Alpha (ACHIEVED)**: All 12 infrastructure components operational + specs | Status: 100% COMPLETE | 2025-12-12 | T033 Nightlight complete (final component) +- **MVP-Alpha (BLOCKED)**: All 12 infrastructure components operational + specs | Status: BLOCKED - 3 critical failures | 2025-12-12 | Audit found: creditservice doesn't compile, chainfire tests fail, iam tests fail - **MVP-Beta (ACHIEVED)**: E2E tenant path functional + FlareDB metadata unified | Gate: T023 complete ✓ | 2025-12-09 - **MVP-K8s (ACHIEVED)**: K8s hosting with multi-tenant isolation | Gate: T025 S6.1 complete ✓ | 2025-12-09 | IAM auth + PrismNET CNI - MVP-Production (future): HA, monitoring, production hardening | Gate: post-K8s @@ -44,26 +44,20 @@ ## Roadmap (Now/Next/Later) - **Now (<= 2 weeks):** - - **T039 ACTIVE**: Production Deployment (Bare-Metal) — Hardware blocker removed! + - **T059 ACTIVE (P0)**: Critical Audit Fix — creditservice compile, chainfire tests, iam tests (BLOCKS MVP-Alpha) + - **T039 BLOCKED**: Production Deployment — Blocked by T059 - **T058 PLANNED**: LightningSTOR S3 Auth Hardening — Fix SigV4 Auth for Production (P0) - - **T052 PLANNED**: CreditService Persistence — InMemory→ChainFire; Hardening for production (PROJECT.md Item 13) - - **T053 PLANNED**: ChainFire Core Finalization — Remove OpenRaft, finish Gossip, clean debt (From T049 Audit) - - **T054 PLANNED**: PlasmaVMC Ops — Hotplug, Reset, Update, Watch (From T049 Audit) - - **T055 PLANNED**: FiberLB Features — Maglev, L7, BGP (From T049 Audit) - - **T056 PLANNED**: FlashDNS Pagination — Pagination for listing APIs (From T049 Audit) - - **T057 PLANNED**: k8shost Resource Management — IPAM & Tenant-aware Scheduler (From T049 Audit) - - **T051 ACTIVE**: FiberLB Integration — S1-S3 complete; Endpoint discovery implemented (S3); S4 Pending - - **T050 ACTIVE**: REST API — S1 Design complete; S2-S8 Implementation pending - - **T047 COMPLETE**: LightningSTOR S3 Compatibility — S1-S3 complete; AWS CLI working (Auth bypassed for MVP) - - **T049 COMPLETE**: Component Audit — Findings in `docs/por/T049-component-audit/FINDINGS.md` - - **T045 COMPLETE**: Service Integration — S1-S4 done; PlasmaVMC + k8shost CreditService admission control (~763L) - - **T044 COMPLETE**: POR Accuracy Fix — NightLight 43 tests corrected, example fixed, CreditService storage clarified - - **T043 COMPLETE**: Naming Cleanup — All services renamed (Nightlight→NightLight, PrismNET consistent) - - **T042 COMPLETE**: CreditService (MVP) — All 6 steps done; **Storage: InMemory only** (T052 created for persistence) - - **T041 COMPLETE**: ChainFire Cluster Join Fix — OpenRaft放棄→自前Raft実装 - - **T040 COMPLETE**: HA Validation — S1-S5 done; 8/8 Raft tests, HA gaps documented - - **T039 DEFERRED**: Production Deployment (Bare-Metal) — No bare-metal hardware available yet - - **MVP-Alpha STATUS**: 12/12 components operational + CreditService (PROJECT.md Item 13 delivered) + - **T052 BLOCKED**: CreditService Persistence — Blocked by T059.S1 (creditservice must compile first) + - **T053 PLANNED**: ChainFire Core Finalization — Remove OpenRaft, finish Gossip, clean debt + - **T054 PLANNED**: PlasmaVMC Ops — Hotplug, Reset, Update, Watch + - **T055 PLANNED**: FiberLB Features — Maglev, L7, BGP + - **T056 PLANNED**: FlashDNS Pagination — Pagination for listing APIs + - **T057 PLANNED**: k8shost Resource Management — IPAM & Tenant-aware Scheduler + - **T051 ACTIVE**: FiberLB Integration — S1-S3 complete; S4 Pending + - **T050 ACTIVE**: REST API — S1 Design complete; S2-S8 pending + - **T047 COMPLETE**: LightningSTOR S3 Compatibility — AWS CLI working (Auth bypassed) + - **T042 BROKEN**: CreditService (MVP) — Code doesn't compile (missing chainfire_client.txn API) + - **MVP-Alpha STATUS**: BLOCKED — 3 critical failures (creditservice, chainfire, iam) - **Next (2-4 weeks) — Integration & Enhancement:** - **SDK**: gRPCクライアント一貫性 (T048) @@ -93,15 +87,16 @@ - **T036 VM Cluster** ✅ — Infrastructure validated ## Decision & Pivot Log (recent 5) +- 2025-12-12 06:25 | **T059 CREATED — Critical Audit Fix (P0)** | Full code audit confirmed user suspicion of quality issues. 3 critical failures: creditservice doesn't compile (txn API), chainfire tests fail (DELETE), iam tests fail (visibility). MVP-Alpha BLOCKED until fixed. - 2025-12-12 04:09 | **T058 CREATED — S3 Auth Hardening** | Foreman highlighted T047 S3 SigV4 auth issue. Creating T058 (P0) to address this critical security gap for production. - 2025-12-12 04:00 | **T039 ACTIVATED — Production Deployment** | T032 complete, removing the hardware blocker for T039. Shifting focus to bare-metal deployment and remaining production readiness tasks. - 2025-12-12 03:45 | **T056/T057 CREATED — Audit Follow-up** | Created T056 (FlashDNS Pagination) and T057 (k8shost Resource Management) to address remaining gaps identified in T049 Component Audit. - 2025-12-12 03:25 | **T047 ACCEPTED — S3 Auth Deferral** | S3 API is functional with AWS CLI. Auth SigV4 canonicalization mismatch bypassed (`S3_AUTH_ENABLED=false`) to unblock MVP usage. Fix deferred to T039/Security phase. -- 2025-12-12 03:00 | **T055 CREATED — FiberLB Features** | Audit T049 confirmed Maglev/L7/BGP gaps. Created T055 to address PROJECT.md Item 7 requirements explicitly, separate from T051 integration work. ## Active Work > Real-time task status: press T in TUI or run `/task` in IM > Task definitions: docs/por/T###-slug/task.yaml +> **Active: T059 Critical Audit Fix (P0)** — creditservice compile, chainfire tests, iam tests > **Active: T039 Production Deployment (P0)** — Hardware blocker removed! > **Active: T058 LightningSTOR S3 Auth Hardening (P0)** — Planned; awaiting start > **Active: T052 CreditService Persistence (P1)** — Planned; awaiting start @@ -122,6 +117,7 @@ - Falsify before expand; one decidable next step; stop with pride when wrong; Done = evidence. ## Maintenance & Change Log (append-only, one line each) +- 2025-12-12 06:25 | peerA | AUDIT: MVP-Alpha BLOCKED - creditservice doesn't compile (missing txn API), chainfire tests fail (DELETE broken), iam tests fail (visibility); delegated to PeerB - 2025-12-12 04:09 | peerA | T058 CREATED: LightningSTOR S3 Auth Hardening (P0) to address critical SigV4 issue identified in T047, as flagged by Foreman. - 2025-12-12 04:06 | peerA | T053/T056 YAML errors fixed (removed backticks from context/acceptance/notes blocks). - 2025-12-12 04:00 | peerA | T039 ACTIVATED: Hardware blocker removed; shifting focus to production deployment. diff --git a/docs/por/T058-s3-auth-hardening/task.yaml b/docs/por/T058-s3-auth-hardening/task.yaml index 7bdd5b9..da4068d 100644 --- a/docs/por/T058-s3-auth-hardening/task.yaml +++ b/docs/por/T058-s3-auth-hardening/task.yaml @@ -1,7 +1,7 @@ id: T058 name: LightningSTOR S3 Auth Hardening goal: Implement robust SigV4 authentication for LightningSTOR S3 API -status: planned +status: active priority: P0 owner: peerB created: 2025-12-12 @@ -26,14 +26,39 @@ steps: - step: S1 name: Debug SigV4 Canonicalization done: Identify and fix the canonicalization mismatch in SigV4 signature verification. - status: in_progress + status: complete + completed: 2025-12-12 06:15 JST owner: peerB priority: P0 + notes: | + **Root Cause Identified:** + - Used `form_urlencoded::byte_serialize` which follows HTML form encoding rules + - AWS SigV4 requires RFC 3986 URI encoding with specific rules + - Encoding mismatch caused canonical request hash to differ from client's + + **Fix Implemented:** + - Created `aws_uri_encode()` matching RFC 3986 + AWS SigV4 spec exactly + - Unreserved chars (A-Z,a-z,0-9,-,_,.,~) are NOT encoded + - All other chars percent-encoded with uppercase hex (%2F not %2f) + - Preserve slashes in paths, encode in query parameters + - Normalize empty paths to '/' per AWS specification + + **Testing:** + - All 8 auth unit tests pass + - Added comprehensive SigV4 signature determinism test + - Fixed test expectations (body hash, HMAC values) + + **Files Modified:** + - lightningstor/crates/lightningstor-server/src/s3/auth.rs (~40L changes) + + outputs: + - path: lightningstor/crates/lightningstor-server/src/s3/auth.rs + note: SigV4 canonicalization fix - step: S2 name: Integrate with IAM done: Fetch IAM credentials for signature verification. - status: pending + status: in_progress owner: peerB priority: P1 @@ -44,6 +69,14 @@ steps: owner: peerB priority: P1 -evidence: [] +evidence: + - cmd: "cargo test --package lightningstor-server --lib s3::auth::tests" + result: "8 passed; 0 failed" + notes: | Critical for production security of the S3 object storage. Blocking T039 for a truly secure deployment. + + **S1 Complete (2025-12-12 06:15 JST):** + - RFC 3986 compliant URI encoding implemented + - All auth tests passing + - Ready for IAM integration (S2) diff --git a/docs/por/T059-audit-fix/task.yaml b/docs/por/T059-audit-fix/task.yaml new file mode 100644 index 0000000..6c92023 --- /dev/null +++ b/docs/por/T059-audit-fix/task.yaml @@ -0,0 +1,33 @@ +id: T059 +name: Critical Audit Fix +goal: Fix 3 critical failures blocking MVP-Alpha (creditservice compile, chainfire tests, iam tests) +status: active +priority: P0 +assigned: peerB +steps: + - id: S1 + name: Fix creditservice chainfire_storage.rs + done: creditservice compiles (cargo check passes) + status: pending + notes: | + Lines 106, 140 call client.txn() but chainfire_client has no txn method. + Options: (A) add txn method to chainfire_client, or (B) rewrite to use compare_and_swap. + Recommended: Option B - use existing APIs. + - id: S2 + name: Fix chainfire DELETE operation + done: chainfire integration tests pass (3/3) + status: pending + notes: | + Integration tests fail at integration_test.rs:91 - assertion failed: deleted. + Debug KvService.delete() implementation. + - id: S3 + name: Fix iam module visibility + done: iam tests pass (tenant_path_integration) + status: pending + notes: | + iam_service module is private but tests import it. + Change to pub mod or re-export needed types. + - id: S4 + name: Full test suite verification + done: All 11 workspaces compile AND tests pass + status: pending diff --git a/docs/por/scope.yaml b/docs/por/scope.yaml index 1994423..8ee9c77 100644 --- a/docs/por/scope.yaml +++ b/docs/por/scope.yaml @@ -1,5 +1,5 @@ version: '1.0' -updated: '2025-12-12T06:23:46.355440' +updated: '2025-12-12T06:31:19.232544' tasks: - T001 - T002 @@ -59,3 +59,4 @@ tasks: - T056 - T057 - T058 +- T059