Skip to content

Commit

Permalink
Make HealthChecks.ResourceUtilization use observable instruments
Browse files Browse the repository at this point in the history
  • Loading branch information
evgenyfedorov2 authored and evgenyfedorov2 committed Jan 16, 2025
1 parent 8f15b0f commit bf28441
Show file tree
Hide file tree
Showing 10 changed files with 549 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
<PropertyGroup>
<EnableConfigurationBindingGenerator>true</EnableConfigurationBindingGenerator>
<InjectSharedDataValidation>true</InjectSharedDataValidation>
<InjectSharedDiagnosticIds>true</InjectSharedDiagnosticIds>
<InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
<InjectObsoleteAttributeOnLegacy>true</InjectObsoleteAttributeOnLegacy>
</PropertyGroup>

<PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
Expand All @@ -13,21 +15,53 @@ namespace Microsoft.Extensions.Diagnostics.HealthChecks;
/// <summary>
/// Represents a health check for in-container resources <see cref="IHealthCheck"/>.
/// </summary>
internal sealed class ResourceUtilizationHealthCheck : IHealthCheck
internal sealed class ResourceUtilizationHealthCheck : IHealthCheck, IDisposable
{
private readonly double _multiplier;
private readonly MeterListener? _meterListener;
private readonly ResourceUtilizationHealthCheckOptions _options;
private readonly IResourceMonitor _dataTracker;
private double _cpuUsedPercentage;
private double _memoryUsedPercentage;

/// <summary>
/// Initializes a new instance of the <see cref="ResourceUtilizationHealthCheck"/> class.
/// </summary>
/// <param name="options">The options.</param>
/// <param name="dataTracker">The datatracker.</param>
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options,
IResourceMonitor dataTracker)
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options, IResourceMonitor dataTracker)
{
#if NETFRAMEWORK
_multiplier = 1;
#else
// Due to a bug on Windows https://github.com/dotnet/extensions/issues/5472,
// the CPU utilization comes in the range [0, 100].
if (OperatingSystem.IsWindows())
{
_multiplier = 1;
}

// On Linux, the CPU utilization comes in the correct range [0, 1], which we will be converting to percentage.
else
{
#pragma warning disable S109 // Magic numbers should not be used
_multiplier = 100;
#pragma warning restore S109 // Magic numbers should not be used
}
#endif
_options = Throw.IfMemberNull(options, options.Value);
_dataTracker = Throw.IfNull(dataTracker);

if (_options.UseObservableResourceMonitoringInstruments)
{
_meterListener = new()
{
InstrumentPublished = OnInstrumentPublished
};

_meterListener.SetMeasurementEventCallback<double>(OnMeasurementRecorded);
_meterListener.Start();
}
}

/// <summary>
Expand All @@ -38,19 +72,29 @@ public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOpt
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
var utilization = _dataTracker.GetUtilization(_options.SamplingWindow);
if (_options.UseObservableResourceMonitoringInstruments)
{
_meterListener!.RecordObservableInstruments();
}
else
{
var utilization = _dataTracker.GetUtilization(_options.SamplingWindow);
_cpuUsedPercentage = utilization.CpuUsedPercentage;
_memoryUsedPercentage = utilization.MemoryUsedPercentage;
}

IReadOnlyDictionary<string, object> data = new Dictionary<string, object>
{
{ nameof(utilization.CpuUsedPercentage), utilization.CpuUsedPercentage },
{ nameof(utilization.MemoryUsedPercentage), utilization.MemoryUsedPercentage },
{ "CpuUsedPercentage", _cpuUsedPercentage },
{ "MemoryUsedPercentage", _memoryUsedPercentage },
};

bool cpuUnhealthy = utilization.CpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = utilization.MemoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage;
bool cpuUnhealthy = _cpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = _memoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage;

if (cpuUnhealthy || memoryUnhealthy)
{
string message = string.Empty;
string message;
if (cpuUnhealthy && memoryUnhealthy)
{
message = "CPU and memory usage is above the limit";
Expand All @@ -67,12 +111,12 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc
return Task.FromResult(HealthCheckResult.Unhealthy(message, default, data));
}

bool cpuDegraded = utilization.CpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = utilization.MemoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage;
bool cpuDegraded = _cpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = _memoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage;

if (cpuDegraded || memoryDegraded)
{
string message = string.Empty;
string message;
if (cpuDegraded && memoryDegraded)
{
message = "CPU and memory usage is close to the limit";
Expand All @@ -91,4 +135,43 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc

return Task.FromResult(HealthCheckResult.Healthy(default, data));
}

/// <inheritdoc />
public void Dispose()
{
Dispose(true);
}

private void Dispose(bool disposing)
{
if (disposing)
{
_meterListener?.Dispose();
}
}

private void OnInstrumentPublished(Instrument instrument, MeterListener listener)
{
if (instrument.Meter.Name is "Microsoft.Extensions.Diagnostics.ResourceMonitoring")
{
listener.EnableMeasurementEvents(instrument);
}
}

private void OnMeasurementRecorded(
Instrument instrument, double measurement,
ReadOnlySpan<KeyValuePair<string, object?>> tags, object? state)
{
switch (instrument.Name)
{
case "process.cpu.utilization":
case "container.cpu.limit.utilization":
_cpuUsedPercentage = measurement * _multiplier;
break;
case "dotnet.process.memory.virtual.utilization":
case "container.memory.limit.utilization":
_memoryUsedPercentage = measurement * _multiplier;
break;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Extensions.Options;
using Microsoft.Shared.Data.Validation;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.Diagnostics.HealthChecks;

Expand All @@ -20,8 +21,7 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for CPU utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds CpuThresholds { get; set; } = new ResourceUsageThresholds();
Expand All @@ -30,18 +30,33 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for memory utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds MemoryThresholds { get; set; } = new ResourceUsageThresholds();

/// <summary>
/// Gets or sets the time window for used for calculating CPU and memory utilization averages.
/// Gets or sets the time window used for calculating CPU and memory utilization averages.
/// </summary>
/// <value>
/// The default value is 5 seconds.
/// </value>
#pragma warning disable CS0436 // Type conflicts with imported type
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
#pragma warning restore CS0436 // Type conflicts with imported type
[TimeSpan(MinimumSamplingWindow, int.MaxValue)]
public TimeSpan SamplingWindow { get; set; } = DefaultSamplingWindow;

/// <summary>
/// Gets or sets a value indicating whether the observable instruments will be used for getting CPU and Memory usage
/// as opposed to the default <see cref="Microsoft.Extensions.Diagnostics.ResourceMonitoring.IResourceMonitor"/> API which is obsolete.
/// </summary>
/// <value>
/// <see langword="true" /> if the observable instruments are used. The default is <see langword="false" />.
/// In the future the default will be <see langword="true" />.
/// </value>
[Experimental(diagnosticId: DiagnosticIds.Experiments.HealthChecks, UrlFormat = DiagnosticIds.UrlFormat)]
public bool UseObservableResourceMonitoringInstruments { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@
<ItemGroup>
<InternalsVisibleToDynamicProxyGenAssembly2 Include="*" />
<InternalsVisibleToTest Include="$(AssemblyName).Tests" />
<InternalsVisibleToTest Include="Microsoft.Extensions.Diagnostics.HealthChecks.ResourceUtilization.Tests" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;

namespace Microsoft.Extensions.Diagnostics.HealthChecks.Test;

public class HealthCheckTestData : IEnumerable<object[]>
{
public static IEnumerable<object[]> Data =>
new List<object[]>
{
new object[]
{
HealthStatus.Healthy,
0.1,
0UL,
1000UL,
new ResourceUsageThresholds(),
new ResourceUsageThresholds(),
"",
},
new object[]
{
HealthStatus.Healthy,
0.2,
0UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
""
},
new object[]
{
HealthStatus.Healthy,
0.2,
2UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
""
},
new object[]
{
HealthStatus.Degraded,
0.4,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"CPU and memory usage is close to the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.5,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"CPU and memory usage is above the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.5,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.4, UnhealthyUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.4, UnhealthyUtilizationPercentage = 0.2 },
"CPU and memory usage is above the limit"
},
new object[]
{
HealthStatus.Degraded,
0.3,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2 },
"CPU and memory usage is close to the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.5,
5UL,
1000UL,
new ResourceUsageThresholds { UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { UnhealthyUtilizationPercentage = 0.4 },
"CPU and memory usage is above the limit"
},
new object[]
{
HealthStatus.Degraded,
0.3,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
"CPU usage is close to the limit"
},
new object[]
{
HealthStatus.Degraded,
0.1,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"Memory usage is close to the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.5,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
"CPU usage is above the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.1,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"Memory usage is above the limit"
},
};

public IEnumerator<object[]> GetEnumerator() => Data.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
Loading

0 comments on commit bf28441

Please sign in to comment.