diff --git a/apps/backend/src/data-retention/data-retention.service.spec.ts b/apps/backend/src/data-retention/data-retention.service.spec.ts index f2fd991e..933c5cbe 100644 --- a/apps/backend/src/data-retention/data-retention.service.spec.ts +++ b/apps/backend/src/data-retention/data-retention.service.spec.ts @@ -337,6 +337,7 @@ describe("DataRetentionService", () => { // confirmedTotalSuccess = 100 - 10 = 90 expect(counterMock.labels).toHaveBeenCalledWith({ checkType: "dataRetention", + network: "calibration", providerId: "1", providerName: "Provider A", providerStatus: "approved", @@ -344,6 +345,7 @@ describe("DataRetentionService", () => { }); expect(counterMock.labels).toHaveBeenCalledWith({ checkType: "dataRetention", + network: "calibration", providerId: "1", providerName: "Provider A", providerStatus: "approved", @@ -380,6 +382,7 @@ describe("DataRetentionService", () => { // confirmedTotalSuccess = 100 - 10 = 90 expect(counterMock.labels).toHaveBeenCalledWith({ checkType: "dataRetention", + network: "calibration", providerId: "1", providerName: "Provider A", providerStatus: "approved", @@ -387,6 +390,7 @@ describe("DataRetentionService", () => { }); expect(counterMock.labels).toHaveBeenCalledWith({ checkType: "dataRetention", + network: "calibration", providerId: "1", providerName: "Provider A", providerStatus: "approved", @@ -621,12 +625,14 @@ describe("DataRetentionService", () => { providerId: 1n, providerName: "Provider A", providerIsApproved: true, + network: "calibration", }); const unapprovedLabels = buildCheckMetricLabels({ checkType: "dataRetention", providerId: 1n, providerName: "Provider A", providerIsApproved: false, + network: "calibration", }); expect(counterMock.remove).toHaveBeenCalledWith({ ...approvedLabels, value: "success" }); expect(counterMock.remove).toHaveBeenCalledWith({ ...approvedLabels, value: "failure" }); @@ -1215,12 +1221,14 @@ describe("DataRetentionService", () => { providerId: 1n, providerName: "Provider A", providerIsApproved: true, + network: "calibration", }); const unapprovedLabels = buildCheckMetricLabels({ checkType: "dataRetention", providerId: 1n, providerName: "Provider A", providerIsApproved: false, + network: "calibration", }); expect(gaugeMock.remove).toHaveBeenCalledWith(approvedLabels); expect(gaugeMock.remove).toHaveBeenCalledWith(unapprovedLabels); diff --git a/apps/backend/src/data-retention/data-retention.service.ts b/apps/backend/src/data-retention/data-retention.service.ts index 3b0094c9..59e01a8a 100644 --- a/apps/backend/src/data-retention/data-retention.service.ts +++ b/apps/backend/src/data-retention/data-retention.service.ts @@ -8,7 +8,7 @@ import { ClickhouseService } from "../clickhouse/clickhouse.service.js"; import { toStructuredError } from "../common/logging.js"; import { isSpBlocked } from "../common/sp-blocklist.js"; import type { Network } from "../common/types.js"; -import { IBlockchainConfig, IConfig } from "../config/app.config.js"; +import { IConfig } from "../config/app.config.js"; import { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js"; import { StorageProvider } from "../database/entities/storage-provider.entity.js"; import { buildCheckMetricLabels, CheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js"; @@ -60,8 +60,7 @@ export class DataRetentionService { * challenge delta since the last poll. */ async pollDataRetention(): Promise { - const blockchainCfg = this.configService.get("blockchain"); - const { network, pdpSubgraphEndpoint } = blockchainCfg; + const { network, pdpSubgraphEndpoint } = this.configService.get("blockchain", { infer: true }); if (!pdpSubgraphEndpoint) { this.logger.warn({ event: "pdp_subgraph_endpoint_not_configured", @@ -266,12 +265,14 @@ export class DataRetentionService { if (provider && provider.providerId != null) { const approvedLabels = buildCheckMetricLabels({ + network, checkType: "dataRetention", providerId: provider.providerId, providerName: provider.name, providerIsApproved: true, }); const unapprovedLabels = buildCheckMetricLabels({ + network, checkType: "dataRetention", providerId: provider.providerId, providerName: provider.name, @@ -375,11 +376,13 @@ export class DataRetentionService { successPeriods: confirmedTotalSuccess, }; + const network = this.configService.get("blockchain", { infer: true }).network; const providerLabels = buildCheckMetricLabels({ checkType: "dataRetention", providerId: pdpProvider.id, providerName: pdpProvider.name, providerIsApproved: pdpProvider.isApproved, + network, }); // Emit overdue periods gauge on every poll — this is a separate signal from the diff --git a/apps/backend/src/deal-addons/strategies/ipni.strategy.spec.ts b/apps/backend/src/deal-addons/strategies/ipni.strategy.spec.ts index d81da8d9..0d0ea677 100644 --- a/apps/backend/src/deal-addons/strategies/ipni.strategy.spec.ts +++ b/apps/backend/src/deal-addons/strategies/ipni.strategy.spec.ts @@ -1,6 +1,7 @@ import { CID } from "multiformats/cid"; import type { Mock } from "vitest"; import { describe, expect, it, vi } from "vitest"; +import type { Network } from "../../common/types.js"; import { Deal } from "../../database/entities/deal.entity.js"; import { StorageProvider } from "../../database/entities/storage-provider.entity.js"; import { IpniStatus, ServiceType } from "../../database/types.js"; @@ -11,6 +12,7 @@ import { IpniAddonStrategy } from "./ipni.strategy.js"; describe("IpniAddonStrategy getPieceStatus", () => { type DealForMetrics = { + network: Network; spAddress?: string; storageProvider?: { providerId?: bigint; @@ -44,6 +46,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { Object.assign(new Deal(), { id: "deal-1", spAddress: "0xsp", + network: "calibration", fileName: "file", fileSize: 1, walletAddress: "0xwallet", @@ -68,6 +71,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { if (!deal?.spAddress) return null; return buildCheckMetricLabels({ checkType: "dataStorage", + network: deal.network, providerId: deal.storageProvider?.providerId, providerName: deal.storageProvider?.name, providerIsApproved: deal.storageProvider?.isApproved, @@ -254,6 +258,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "9", providerName: "SP", providerStatus: "approved", @@ -343,6 +348,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "9", providerName: "SP", providerStatus: "approved", @@ -408,7 +414,13 @@ describe("IpniAddonStrategy getPieceStatus", () => { 2000, ); - const labels = { checkType: "dataStorage", providerId: "9", providerName: "SP", providerStatus: "approved" }; + const labels = { + checkType: "dataStorage", + network: "calibration", + providerId: "9", + providerName: "SP", + providerStatus: "approved", + }; expect(discoverabilityMetrics.observeIpniVerifyMs).toHaveBeenCalledWith(labels, 500, "error"); }); @@ -532,7 +544,13 @@ describe("IpniAddonStrategy getPieceStatus", () => { await expect(strategyForTest.startIpniMonitoring(deal)).resolves.toBeUndefined(); - const labels = { checkType: "dataStorage", providerId: "9", providerName: "SP", providerStatus: "approved" }; + const labels = { + checkType: "dataStorage", + network: "calibration", + providerId: "9", + providerName: "SP", + providerStatus: "approved", + }; const statusCalls = (discoverabilityMetrics.recordStatus as Mock).mock.calls.filter( ([, value]: [unknown, string]) => value.startsWith("failure.") || value === "skipped" || value === "success", ); @@ -571,6 +589,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "9", providerName: "SP", providerStatus: "approved", @@ -610,6 +629,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "9", providerName: "SP", providerStatus: "approved", @@ -650,6 +670,7 @@ describe("IpniAddonStrategy getPieceStatus", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "9", providerName: "SP", providerStatus: "approved", diff --git a/apps/backend/src/deal/deal.service.spec.ts b/apps/backend/src/deal/deal.service.spec.ts index 0672a7a2..156643d5 100644 --- a/apps/backend/src/deal/deal.service.spec.ts +++ b/apps/backend/src/deal/deal.service.spec.ts @@ -392,6 +392,7 @@ describe("DealService", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "42", providerName: "Test Provider", providerStatus: "approved", @@ -588,6 +589,7 @@ describe("DealService", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "7", providerName: "Test Provider", providerStatus: "unapproved", @@ -622,6 +624,7 @@ describe("DealService", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "7", providerName: "Test Provider", providerStatus: "unapproved", @@ -911,6 +914,7 @@ describe("DealService", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "42", providerName: "Test Provider", providerStatus: "approved", @@ -960,6 +964,7 @@ describe("DealService", () => { const labels = { checkType: "dataStorage", + network: "calibration", providerId: "42", providerName: "Test Provider", providerStatus: "approved", diff --git a/apps/backend/src/deal/deal.service.ts b/apps/backend/src/deal/deal.service.ts index ac137e98..5bb91bf9 100644 --- a/apps/backend/src/deal/deal.service.ts +++ b/apps/backend/src/deal/deal.service.ts @@ -267,10 +267,12 @@ export class DealService implements OnModuleInit, OnModuleDestroy { extraDataSetMetadata?: Record, logContext?: ProviderJobContext, ): Promise { + const network = this.blockchainConfig.network; const providerAddress = pdpProvider.serviceProvider; const checkType = "dataStorage" as const; let providerLabels = buildCheckMetricLabels({ checkType, + network, providerId: pdpProvider.id, providerName: pdpProvider.name, providerIsApproved: pdpProvider.isApproved, @@ -312,7 +314,7 @@ export class DealService implements OnModuleInit, OnModuleDestroy { deal.fileName = dealInput.processedData.name; deal.fileSize = dealInput.processedData.size; deal.spAddress = providerAddress; - deal.network = this.blockchainConfig.network; + deal.network = network; deal.status = DealStatus.PENDING; deal.walletAddress = this.blockchainConfig.walletAddress; deal.metadata = dealInput.metadata; @@ -343,6 +345,7 @@ export class DealService implements OnModuleInit, OnModuleDestroy { dealLogContext.providerId = deal.storageProvider?.providerId ?? dealLogContext.providerId; providerLabels = buildCheckMetricLabels({ checkType, + network, providerId: deal.storageProvider?.providerId, providerName: pdpProvider.name ?? deal.storageProvider?.name, providerIsApproved: pdpProvider.isApproved ?? deal.storageProvider?.isApproved, @@ -865,6 +868,7 @@ export class DealService implements OnModuleInit, OnModuleDestroy { } const labels = buildCheckMetricLabels({ checkType: "dataSetCreation", + network: this.blockchainConfig.network, providerId: providerInfo.id, providerName: providerInfo.name, providerIsApproved: providerInfo.isApproved, diff --git a/apps/backend/src/jobs/jobs.service.spec.ts b/apps/backend/src/jobs/jobs.service.spec.ts index b3fc8741..b75a5131 100644 --- a/apps/backend/src/jobs/jobs.service.spec.ts +++ b/apps/backend/src/jobs/jobs.service.spec.ts @@ -224,9 +224,13 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "recordJobExecution", "deal", run); expect(run).toHaveBeenCalled(); - expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal" }); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", handler_result: "success" }); - expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal" }, 5); + expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "deal", + handler_result: "success", + network: "calibration", + }); + expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 5); }); it("records metrics for failed job execution", async () => { @@ -245,9 +249,13 @@ describe("JobsService schedule rows", () => { await expect(callPrivate(service, "recordJobExecution", "deal", run)).rejects.toThrow("boom"); - expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal" }); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", handler_result: "error" }); - expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal" }, 2); + expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "deal", + handler_result: "error", + network: "calibration", + }); + expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 2); }); it("records metrics for aborted job execution", async () => { @@ -266,9 +274,13 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "recordJobExecution", "deal", run); - expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal" }); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", handler_result: "aborted" }); - expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal" }, 3); + expect(startedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "deal", + handler_result: "aborted", + network: "calibration", + }); + expect(durationHistogram.observe).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 3); }); it("deal job records aborted when abort signal fires", async () => { @@ -329,7 +341,11 @@ describe("JobsService schedule rows", () => { await vi.advanceTimersByTimeAsync(120_000); await jobPromise; - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", handler_result: "aborted" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "deal", + handler_result: "aborted", + network: "calibration", + }); }); it("retrieval job records aborted when abort signal fires", async () => { @@ -387,7 +403,11 @@ describe("JobsService schedule rows", () => { await vi.advanceTimersByTimeAsync(60_000); await jobPromise; - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "retrieval", handler_result: "aborted" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "retrieval", + handler_result: "aborted", + network: "calibration", + }); }); it("retrieval job resolves providerId from storage_providers when wallet cache misses", async () => { @@ -464,7 +484,11 @@ describe("JobsService schedule rows", () => { ).rejects.toThrow("providerId is required for job execution"); expect(retrievalService.performRandomRetrievalForProvider).not.toHaveBeenCalled(); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "retrieval", handler_result: "error" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "retrieval", + handler_result: "error", + network: "calibration", + }); }); it("updates queue metrics from pg-boss state and age queries", async () => { @@ -485,15 +509,15 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "updateQueueMetrics"); - expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal" }, 0); - expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "retrieval" }, 0); - expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "data_retention_poll" }, 0); - expect(jobsInFlightGauge.set).toHaveBeenCalledWith({ job_type: "retrieval" }, 1); - expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal" }, 2); + expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 0); + expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "retrieval", network: "calibration" }, 0); + expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "data_retention_poll", network: "calibration" }, 0); + expect(jobsInFlightGauge.set).toHaveBeenCalledWith({ job_type: "retrieval", network: "calibration" }, 1); + expect(jobsQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 2); - expect(oldestQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal" }, 12); - expect(oldestInFlightGauge.set).toHaveBeenCalledWith({ job_type: "retrieval" }, 34); - expect(jobsPausedGauge.set).toHaveBeenCalledWith({ job_type: "deal" }, 0); + expect(oldestQueuedGauge.set).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 12); + expect(oldestInFlightGauge.set).toHaveBeenCalledWith({ job_type: "retrieval", network: "calibration" }, 34); + expect(jobsPausedGauge.set).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 0); }); it("registers pg-boss workers with per-queue batch sizes", async () => { @@ -623,7 +647,7 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "updateQueueMetrics"); - expect(jobsPausedGauge.set).toHaveBeenCalledWith({ job_type: "deal" }, 2); + expect(jobsPausedGauge.set).toHaveBeenCalledWith({ job_type: "deal", network: "calibration" }, 2); }); it("adds schedule rows for newly seen providers", async () => { @@ -1046,7 +1070,11 @@ describe("JobsService schedule rows", () => { }); expect(dealService.createDealForProvider).toHaveBeenCalledTimes(1); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: "deal", handler_result: "error" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: "deal", + handler_result: "error", + network: "calibration", + }); }); it("data_set_creation job creates initial data set when minNumDataSetsForChecks is 1", async () => { @@ -1276,9 +1304,9 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "updateStorageProviderGauges"); - expect(activeGauge.set).toHaveBeenCalledWith({ status: "active" }, 7); - expect(activeGauge.set).toHaveBeenCalledWith({ status: "inactive" }, 3); - expect(testedGauge.set).toHaveBeenCalledWith(7); + expect(activeGauge.set).toHaveBeenCalledWith({ status: "active", network: "calibration" }, 7); + expect(activeGauge.set).toHaveBeenCalledWith({ status: "inactive", network: "calibration" }, 3); + expect(testedGauge.set).toHaveBeenCalledWith({ network: "calibration" }, 7); }); it("filters tested providers by isApproved when useOnlyApprovedProviders is enabled", async () => { @@ -1322,7 +1350,7 @@ describe("JobsService schedule rows", () => { await callPrivate(service, "updateStorageProviderGauges"); - expect(testedGauge.set).toHaveBeenCalledWith(2); // 3 providers minus 1 globally blocked + expect(testedGauge.set).toHaveBeenCalledWith({ network: "calibration" }, 2); // 3 providers minus 1 globally blocked }); it("catches storage provider gauge errors without rethrowing", async () => { @@ -1500,7 +1528,11 @@ describe("JobsService schedule rows", () => { }); testCase.expectCheckNotRun(); - expect(completedCounter.inc).toHaveBeenCalledWith({ job_type: testCase.jobType, handler_result: "success" }); + expect(completedCounter.inc).toHaveBeenCalledWith({ + job_type: testCase.jobType, + handler_result: "success", + network: "calibration", + }); } expect(storageProviderRepositoryMock.findOne).not.toHaveBeenCalled(); diff --git a/apps/backend/src/jobs/jobs.service.ts b/apps/backend/src/jobs/jobs.service.ts index 16c04d5a..1b4866d5 100644 --- a/apps/backend/src/jobs/jobs.service.ts +++ b/apps/backend/src/jobs/jobs.service.ts @@ -818,10 +818,11 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { const activeCount = await this.storageProviderRepository.count({ where: { isActive: true } }); const inactiveCount = Math.max(0, totalProviders - activeCount); - this.storageProvidersActive.set({ status: "active" }, activeCount); - this.storageProvidersActive.set({ status: "inactive" }, inactiveCount); + const { network, useOnlyApprovedProviders } = this.configService.get("blockchain", { infer: true }); + + this.storageProvidersActive.set({ status: "active", network }, activeCount); + this.storageProvidersActive.set({ status: "inactive", network }, inactiveCount); - const useOnlyApprovedProviders = this.configService.get("blockchain").useOnlyApprovedProviders; const testedWhere = useOnlyApprovedProviders ? { isActive: true, isApproved: true } : { isActive: true }; const spBlocklists = this.configService.get("spBlocklists"); const hasGlobalBlocklist = spBlocklists.addresses.size > 0 || spBlocklists.ids.size > 0; @@ -835,7 +836,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { } else { testedCount = await this.storageProviderRepository.count({ where: testedWhere }); } - this.storageProvidersTested.set(testedCount); + this.storageProvidersTested.set({ network }, testedCount); } catch (error) { this.logger.warn({ event: "update_storage_provider_metrics_failed", @@ -1331,7 +1332,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { finalOptions.singletonKey = `${data.network}:${jobType}`; } await this.boss.send(name, data, finalOptions); - this.jobsEnqueueAttemptsCounter.inc({ job_type: jobType, outcome: "success" }); + this.jobsEnqueueAttemptsCounter.inc({ job_type: jobType, outcome: "success", network: data.network }); return true; } catch (error) { this.logger.warn({ @@ -1341,7 +1342,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { jobType, error: toStructuredError(error), }); - this.jobsEnqueueAttemptsCounter.inc({ job_type: jobType, outcome: "error" }); + this.jobsEnqueueAttemptsCounter.inc({ job_type: jobType, outcome: "error", network: data.network }); return false; } } @@ -1350,17 +1351,18 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { * Records handler start/end metrics around a job execution. */ private async recordJobExecution(jobType: JobType, run: () => Promise): Promise { + const network = this.configService.get("blockchain", { infer: true }).network; const startedAt = Date.now(); - this.jobsStartedCounter.inc({ job_type: jobType }); + this.jobsStartedCounter.inc({ job_type: jobType, network }); try { const status = await run(); const finishedAt = Date.now(); - this.jobDuration.observe({ job_type: jobType }, (finishedAt - startedAt) / 1000); - this.jobsCompletedCounter.inc({ job_type: jobType, handler_result: status }); + this.jobDuration.observe({ job_type: jobType, network }, (finishedAt - startedAt) / 1000); + this.jobsCompletedCounter.inc({ job_type: jobType, handler_result: status, network }); } catch (error) { const finishedAt = Date.now(); - this.jobDuration.observe({ job_type: jobType }, (finishedAt - startedAt) / 1000); - this.jobsCompletedCounter.inc({ job_type: jobType, handler_result: "error" }); + this.jobDuration.observe({ job_type: jobType, network }, (finishedAt - startedAt) / 1000); + this.jobsCompletedCounter.inc({ job_type: jobType, handler_result: "error", network }); throw error; } } @@ -1381,12 +1383,12 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { "pull_piece_cleanup", ]; for (const jobType of jobTypes) { - this.jobsQueuedGauge.set({ job_type: jobType }, 0); - this.jobsRetryScheduledGauge.set({ job_type: jobType }, 0); - this.jobsInFlightGauge.set({ job_type: jobType }, 0); - this.jobsPausedGauge.set({ job_type: jobType }, 0); - this.oldestQueuedAgeGauge.set({ job_type: jobType }, 0); - this.oldestInFlightAgeGauge.set({ job_type: jobType }, 0); + this.jobsQueuedGauge.set({ job_type: jobType, network }, 0); + this.jobsRetryScheduledGauge.set({ job_type: jobType, network }, 0); + this.jobsInFlightGauge.set({ job_type: jobType, network }, 0); + this.jobsPausedGauge.set({ job_type: jobType, network }, 0); + this.oldestQueuedAgeGauge.set({ job_type: jobType, network }, 0); + this.oldestInFlightAgeGauge.set({ job_type: jobType, network }, 0); } const rows = await this.jobScheduleRepository.countBossJobStates(["created", "retry", "active"], network); @@ -1396,11 +1398,11 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { if (!jobTypes.includes(jobType)) continue; const state = String(row.state).toLowerCase(); if (state === "active") { - this.jobsInFlightGauge.set({ job_type: jobType }, row.count); + this.jobsInFlightGauge.set({ job_type: jobType, network }, row.count); } else if (state === "retry") { - this.jobsRetryScheduledGauge.set({ job_type: jobType }, row.count); + this.jobsRetryScheduledGauge.set({ job_type: jobType, network }, row.count); } else { - this.jobsQueuedGauge.set({ job_type: jobType }, row.count); + this.jobsQueuedGauge.set({ job_type: jobType, network }, row.count); } } } else { @@ -1413,7 +1415,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { const pausedSchedules = await this.jobScheduleRepository.countPausedSchedules(network); for (const row of pausedSchedules) { - this.jobsPausedGauge.set({ job_type: row.job_type }, row.count); + this.jobsPausedGauge.set({ job_type: row.job_type, network }, row.count); } const now = new Date(); @@ -1421,14 +1423,14 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown { for (const row of queuedAges) { const jobType = row.job_type as JobType; if (!jobTypes.includes(jobType)) continue; - this.oldestQueuedAgeGauge.set({ job_type: jobType }, Math.max(0, row.min_age_seconds ?? 0)); + this.oldestQueuedAgeGauge.set({ job_type: jobType, network }, Math.max(0, row.min_age_seconds ?? 0)); } const activeAges = await this.jobScheduleRepository.minBossJobAgeSecondsByState("active", now, network); for (const row of activeAges) { const jobType = row.job_type as JobType; if (!jobTypes.includes(jobType)) continue; - this.oldestInFlightAgeGauge.set({ job_type: jobType }, Math.max(0, row.min_age_seconds ?? 0)); + this.oldestInFlightAgeGauge.set({ job_type: jobType, network }, Math.max(0, row.min_age_seconds ?? 0)); } } } diff --git a/apps/backend/src/metrics-prometheus/check-metric-labels.ts b/apps/backend/src/metrics-prometheus/check-metric-labels.ts index 07415d45..72cbf31a 100644 --- a/apps/backend/src/metrics-prometheus/check-metric-labels.ts +++ b/apps/backend/src/metrics-prometheus/check-metric-labels.ts @@ -1,3 +1,5 @@ +import type { Network } from "../common/types.js"; + export type CheckType = "dataStorage" | "retrieval" | "dataRetention" | "dataSetCreation" | "pullCheck"; export type ProviderStatus = "approved" | "unapproved"; @@ -6,10 +8,12 @@ export type CheckMetricLabels = { providerId: string; providerName: string; providerStatus: ProviderStatus; + network: Network; }; export type CheckMetricLabelInput = { checkType: CheckType; + network: Network; providerId?: bigint | null; providerName?: string | null; providerIsApproved?: boolean | null; @@ -17,6 +21,7 @@ export type CheckMetricLabelInput = { export const buildCheckMetricLabels = ({ checkType, + network, providerId, providerName, providerIsApproved, @@ -26,6 +31,7 @@ export const buildCheckMetricLabels = ({ return { checkType, + network, providerId: normalizedProviderId, providerName: providerName ?? "unknown", providerStatus, diff --git a/apps/backend/src/metrics-prometheus/check-metrics.service.ts b/apps/backend/src/metrics-prometheus/check-metrics.service.ts index 32c1aa1c..c35ca21f 100644 --- a/apps/backend/src/metrics-prometheus/check-metrics.service.ts +++ b/apps/backend/src/metrics-prometheus/check-metrics.service.ts @@ -243,6 +243,7 @@ export class DiscoverabilityCheckMetrics { if (!deal.spAddress) return null; return buildCheckMetricLabels({ checkType: "dataStorage", + network: deal.network, providerId: deal.storageProvider?.providerId, providerName: deal.storageProvider?.name, providerIsApproved: deal.storageProvider?.isApproved, diff --git a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts index beed4d6f..ae85c5dd 100644 --- a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts +++ b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts @@ -60,98 +60,98 @@ const metricProviders = [ // docs/checks/events-and-metrics.md#ingestMs name: "ingestMs", help: "Time to upload a piece to a storage provider (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ingestThroughputBps name: "ingestThroughputBps", help: "Ingest throughput in bytes per second", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: throughputBuckets, }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#pieceAddedOnChainMs name: "pieceAddedOnChainMs", help: "Time from upload end to piece added on-chain (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#pieceConfirmedOnChainMs name: "pieceConfirmedOnChainMs", help: "Time from piece added to piece confirmed on-chain (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#spIndexLocallyMs name: "spIndexLocallyMs", help: "Time from upload end to SP indexing locally (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#spAnnounceAdvertisementMs name: "spAnnounceAdvertisementMs", help: "Time from upload end to SP advertisement to IPNI (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ipniVerifyMs name: "ipniVerifyMs", help: "IPNI verification duration (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 90000, 120000, 180000, 240000, 300000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ipfsRetrievalFirstByteMs name: "ipfsRetrievalFirstByteMs", help: "Time to first byte for IPFS retrievals (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ipfsRetrievalBlockFirstByteMs name: "ipfsRetrievalBlockFirstByteMs", help: "Time to first byte for individual IPFS block fetches (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ipfsRetrievalLastByteMs name: "ipfsRetrievalLastByteMs", help: "Time to last byte for IPFS retrievals (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#ipfsRetrievalThroughputBps name: "ipfsRetrievalThroughputBps", help: "IPFS retrieval throughput in bytes per second", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: throughputBuckets, }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#dataStorageCheckMs name: "dataStorageCheckMs", help: "End-to-end data storage check duration (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#retrievalCheckMs name: "retrievalCheckMs", help: "End-to-end retrieval check duration (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000], }), makeHistogramProvider({ // docs/checks/events-and-metrics.md#dataSetCreationMs name: "dataSetCreationMs", help: "End-to-end data-set creation upload duration (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000], }), // Sub-status metrics (docs/checks/data-storage.md) @@ -159,105 +159,106 @@ const metricProviders = [ // docs/checks/data-storage.md#sub-status-meanings (Upload Status) name: "dataStorageUploadStatus", help: "Data storage upload sub-status counts", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/data-storage.md#sub-status-meanings (Onchain Status) name: "dataStorageOnchainStatus", help: "Data storage onchain sub-status counts", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/data-storage.md#deal-status-progression (Overall Status) name: "dataStorageStatus", help: "Data storage check overall status counts (success when all sub-statuses succeed, failure.timedout/failure.other otherwise)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/data-storage.md#sub-status-meanings (Discoverability Status) name: "discoverabilityStatus", help: "Discoverability sub-status counts", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/data-storage.md#sub-status-meanings (Retrieval Status) name: "retrievalStatus", help: "Retrieval sub-status counts", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/events-and-metrics.md#ipfsRetrievalHttpResponseCode name: "ipfsRetrievalHttpResponseCode", help: "HTTP response codes for IPFS retrievals", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeCounterProvider({ // docs/checks/events-and-metrics.md#dataSetCreationStatus name: "dataSetCreationStatus", help: "Data-set creation status counts", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), // Pull check metrics (docs/checks/pull-check.md) makeHistogramProvider({ name: "pullRequestAcknowledgementLatencyMs", help: "Time from pull request submission to SP request acknowledgement (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000], }), makeHistogramProvider({ name: "pullRequestStartedMs", help: "Time from pullPieces submission to the SP reading the first byte of the hosted-piece stream (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000], }), makeHistogramProvider({ name: "pullRequestCompletionLatencyMs", help: "Time from pull request submission to terminal SP pull status (ms)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000], }), makeCounterProvider({ name: "pullRequestProviderStatus", help: "Terminal SP-reported pull status recorded once per check (intermediate polling statuses are not counted)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeHistogramProvider({ name: "pullRequestThroughputBps", help: "Pull-check throughput approximated as pieceSize / completionLatency in bytes per second", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, buckets: throughputBuckets, }), makeCounterProvider({ name: "pullCheckStatus", help: "Pull-check terminal status counts (success | failure.timedout | failure.other)", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), // Data Retention Metrics makeCounterProvider({ name: "dataSetChallengeStatus", help: "Provider dataset challenge status", - labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value", "network"] as const, }), makeGaugeProvider({ name: "pdp_provider_estimated_overdue_periods", help: "Estimated number of unrecorded overdue proving periods per provider. Resets to 0 when the subgraph catches up.", - labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const, + labelNames: ["checkType", "providerId", "providerName", "providerStatus", "network"] as const, }), // Storage provider metrics: absolute counts, independent of query filters. makeGaugeProvider({ name: "storage_providers_active", help: "Number of active storage providers", - labelNames: ["status"] as const, + labelNames: ["status", "network"] as const, }), makeGaugeProvider({ name: "storage_providers_tested", help: "Number of storage providers being tested", + labelNames: ["network"] as const, }), // Wallet metrics: balances in base units as returned by chain services. makeGaugeProvider({ name: "wallet_balance", help: "Wallet balance in base units (per currency)", - labelNames: ["currency", "wallet"] as const, + labelNames: ["currency", "wallet", "network"] as const, }), // Job scheduler metrics (pg-boss) /** @@ -266,7 +267,7 @@ const metricProviders = [ makeGaugeProvider({ name: "jobs_queued", help: "Number of queued jobs (pg-boss state: created)", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Jobs scheduled for retry per type (pg-boss state: retry). @@ -274,7 +275,7 @@ const metricProviders = [ makeGaugeProvider({ name: "jobs_retry_scheduled", help: "Number of jobs in retry state (pg-boss state: retry)", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Oldest queued job age per type (seconds). @@ -282,7 +283,7 @@ const metricProviders = [ makeGaugeProvider({ name: "oldest_queued_age_seconds", help: "Age in seconds of the oldest queued job (pg-boss state: created)", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Oldest in-flight job age per type (seconds). @@ -290,7 +291,7 @@ const metricProviders = [ makeGaugeProvider({ name: "oldest_in_flight_age_seconds", help: "Age in seconds of the oldest active job (pg-boss state: active)", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Currently executing jobs per type (pg-boss state: active). @@ -298,7 +299,7 @@ const metricProviders = [ makeGaugeProvider({ name: "jobs_in_flight", help: "Number of active jobs currently executing", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Manually paused jobs per type (paused = true in job_schedule_state). @@ -306,7 +307,7 @@ const metricProviders = [ makeGaugeProvider({ name: "jobs_paused", help: "Number of manually paused jobs in job_schedule_state", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Enqueue attempts per type (success/error). @@ -314,7 +315,7 @@ const metricProviders = [ makeCounterProvider({ name: "jobs_enqueue_attempts_total", help: "Total number of enqueue attempts", - labelNames: ["job_type", "outcome"] as const, + labelNames: ["job_type", "outcome", "network"] as const, }), /** * Jobs started by handlers per type. @@ -322,7 +323,7 @@ const metricProviders = [ makeCounterProvider({ name: "jobs_started_total", help: "Total number of jobs started", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, }), /** * Handler completion results per type. @@ -335,7 +336,7 @@ const metricProviders = [ makeCounterProvider({ name: "jobs_completed_total", help: "Total number of jobs completed", - labelNames: ["job_type", "handler_result"] as const, + labelNames: ["job_type", "handler_result", "network"] as const, }), /** * Handler execution duration per type (seconds). @@ -343,7 +344,7 @@ const metricProviders = [ makeHistogramProvider({ name: "job_duration_seconds", help: "Job execution duration in seconds", - labelNames: ["job_type"] as const, + labelNames: ["job_type", "network"] as const, buckets: [0.1, 0.5, 1, 2, 3, 4, 5, 10, 15, 20, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 420, 600], }), ]; diff --git a/apps/backend/src/metrics-prometheus/wallet-balance.collector.ts b/apps/backend/src/metrics-prometheus/wallet-balance.collector.ts index c804ac15..be01ab15 100644 --- a/apps/backend/src/metrics-prometheus/wallet-balance.collector.ts +++ b/apps/backend/src/metrics-prometheus/wallet-balance.collector.ts @@ -43,9 +43,10 @@ export class WalletBalanceCollector implements OnModuleInit { this.refreshPromise = (async () => { try { const { usdfc, fil } = await this.walletSdkService.getWalletBalances(); - const walletShort = this.configService.get("blockchain").walletAddress.slice(0, 8); - this.walletBalanceGauge.set({ currency: "USDFC", wallet: walletShort }, Number(usdfc)); - this.walletBalanceGauge.set({ currency: "FIL", wallet: walletShort }, Number(fil)); + const { network, walletAddress } = this.configService.get("blockchain", { infer: true }); + const walletShort = walletAddress.slice(0, 8); + this.walletBalanceGauge.set({ currency: "USDFC", wallet: walletShort, network }, Number(usdfc)); + this.walletBalanceGauge.set({ currency: "FIL", wallet: walletShort, network }, Number(fil)); this.cachedAt = Date.now(); } catch (error) { this.logger.warn({ diff --git a/apps/backend/src/pull-check/pull-check.service.ts b/apps/backend/src/pull-check/pull-check.service.ts index ff76a264..887a9f5b 100644 --- a/apps/backend/src/pull-check/pull-check.service.ts +++ b/apps/backend/src/pull-check/pull-check.service.ts @@ -68,6 +68,7 @@ export class PullCheckService { const providerInfo = this.validateProviderInfo(spAddress); const labels = buildCheckMetricLabels({ checkType: "pullCheck", + network: this.configService.get("blockchain", { infer: true }).network, providerId: providerInfo.id, providerName: providerInfo.name, providerIsApproved: providerInfo.isApproved, diff --git a/apps/backend/src/retrieval/retrieval.service.ts b/apps/backend/src/retrieval/retrieval.service.ts index fbb5879c..5671c932 100644 --- a/apps/backend/src/retrieval/retrieval.service.ts +++ b/apps/backend/src/retrieval/retrieval.service.ts @@ -100,6 +100,7 @@ export class RetrievalService { } const providerLabels = buildCheckMetricLabels({ checkType: "retrieval", + network: deal.network, providerId: provider.providerId, providerName: provider.name, providerIsApproved: provider.isApproved,