2026-02-27 14:17:13 +08:00
|
|
|
|
using AutoNotificatPhone.Controllers;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
using Common;
|
2026-02-27 14:17:13 +08:00
|
|
|
|
using Microsoft.Extensions.Configuration;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
using NLog;
|
2026-02-09 19:31:55 +08:00
|
|
|
|
using Npgsql;
|
2026-02-27 14:17:13 +08:00
|
|
|
|
using System.Diagnostics;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
|
|
|
|
|
namespace AutoNotificatPhone.Models
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// 定时后台服务:
|
|
|
|
|
|
/// 1) 每分钟固定时刻执行巡检
|
|
|
|
|
|
/// 2) 执行整点/定时通知
|
|
|
|
|
|
/// 3) 执行 Redis 指标告警
|
|
|
|
|
|
/// 4) 执行 PostgreSQL 心跳检查告警
|
|
|
|
|
|
/// </summary>
|
2025-11-20 09:56:11 +08:00
|
|
|
|
public class TimerClass : BackgroundService
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// NLog 记录器
|
|
|
|
|
|
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
|
|
|
|
|
|
|
|
|
|
|
|
// 告警通知接收手机号
|
2025-11-20 09:56:11 +08:00
|
|
|
|
private static readonly string Mobile1 = "13509214696";
|
|
|
|
|
|
private static readonly string Mobile2 = "16620970520";
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
|
|
|
|
|
// 每日定时任务触发小时(北京时间)
|
|
|
|
|
|
private static readonly HashSet<int> DailyTaskHours = [10, 15, 22];
|
|
|
|
|
|
|
|
|
|
|
|
// 每分钟在第 30 秒触发一次巡检
|
|
|
|
|
|
private const int RunSecond = 30;
|
|
|
|
|
|
// 主循环异常后的重试等待时间(秒)
|
|
|
|
|
|
private const int RetryDelaySeconds = 10;
|
|
|
|
|
|
|
|
|
|
|
|
// 默认短信/电话任务过期时间(秒)
|
|
|
|
|
|
private const int SmsDeadlineSeconds = 1800;
|
|
|
|
|
|
private const int CallDeadlineSeconds = 900;
|
|
|
|
|
|
// 扩展短信/电话任务过期时间(秒)
|
|
|
|
|
|
private const int ExtendedSmsDeadlineSeconds = 3600;
|
|
|
|
|
|
private const int ExtendedCallDeadlineSeconds = 1800;
|
|
|
|
|
|
|
|
|
|
|
|
// Kafka 心跳超时阈值(分钟)
|
|
|
|
|
|
private const int KafkaStaleMinutes = 5;
|
2026-03-25 09:07:18 +08:00
|
|
|
|
// 数据库备份窗口(北京时间 3:00-4:00),该时段跳过心跳检查
|
|
|
|
|
|
private const int KafkaCheckSkipStartHour = 3;
|
|
|
|
|
|
private const int KafkaCheckSkipEndHour = 4;
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 数据库连接失败累计到 N 次才触发一次告警,避免告警风暴
|
|
|
|
|
|
private const int KafkaDbAlertTriggerCount = 8;
|
|
|
|
|
|
|
|
|
|
|
|
// 接收包“低值”判定阈值
|
|
|
|
|
|
private const int RecvPackageLowThreshold = 70000;
|
|
|
|
|
|
|
|
|
|
|
|
// 用于防止同一整点任务重复执行
|
|
|
|
|
|
private readonly Dictionary<DateTime, bool> _executedTasks = new();
|
|
|
|
|
|
// 复用 API 控制器发送短信/电话任务
|
2025-11-20 09:56:11 +08:00
|
|
|
|
private readonly CallAndMsgController _callAndMsgController = new();
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 从 appsettings 读取 Postgres 配置
|
2026-02-09 19:31:55 +08:00
|
|
|
|
private readonly IConfiguration _configuration;
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// Kafka 数据库连接失败计数器
|
|
|
|
|
|
private int _kafkaDbConnectionAlertCount;
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// 构造函数,注入配置对象。
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="configuration">应用配置(用于读取 Postgres 节点)</param>
|
2026-02-09 19:31:55 +08:00
|
|
|
|
public TimerClass(IConfiguration configuration)
|
|
|
|
|
|
{
|
|
|
|
|
|
_configuration = configuration;
|
|
|
|
|
|
}
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 后台服务主循环。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="cancellationToken">服务取消令牌</param>
|
2025-11-20 09:56:11 +08:00
|
|
|
|
protected override async Task ExecuteAsync(CancellationToken cancellationToken)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
while (!cancellationToken.IsCancellationRequested)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
// 等待到下一次固定执行时刻(每分钟第 RunSecond 秒)
|
|
|
|
|
|
await DelayUntilNextRunAsync(cancellationToken);
|
|
|
|
|
|
|
|
|
|
|
|
// 检查电话机进程在线状态并写日志
|
|
|
|
|
|
LogPhoneStatus(CheckPhoneIsOnline());
|
|
|
|
|
|
|
|
|
|
|
|
// 执行整点/每日通知任务
|
|
|
|
|
|
RunHourlyNotificationTask();
|
|
|
|
|
|
|
|
|
|
|
|
// 执行各项系统检查任务
|
|
|
|
|
|
CheckCpuThreshold();
|
|
|
|
|
|
CheckRcuOnline();
|
|
|
|
|
|
CheckTotalSendPackage();
|
|
|
|
|
|
CheckTotalRecvPackage();
|
|
|
|
|
|
await CheckKafkaHeartbeatAsync();
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (TaskCanceledException)
|
|
|
|
|
|
{
|
|
|
|
|
|
// 服务停止时会进入这里
|
|
|
|
|
|
Logger.Error("任务被取消");
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 主循环兜底异常,稍后重试
|
|
|
|
|
|
Logger.Error($"主循环发生错误: {ex.Message}");
|
|
|
|
|
|
await Task.Delay(TimeSpan.FromSeconds(RetryDelaySeconds), cancellationToken);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// 等待到下一次固定执行时间。
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="cancellationToken">取消令牌</param>
|
|
|
|
|
|
private async Task DelayUntilNextRunAsync(CancellationToken cancellationToken)
|
|
|
|
|
|
{
|
|
|
|
|
|
var now = DateTime.UtcNow;
|
|
|
|
|
|
var nextRunTime = CalculateNextRunTime(now);
|
|
|
|
|
|
var delayTime = nextRunTime - now;
|
|
|
|
|
|
// 到点前阻塞等待
|
|
|
|
|
|
await Task.Delay(delayTime, cancellationToken);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 计算下一次执行时间点(每分钟第 RunSecond 秒)。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="now">当前 UTC 时间</param>
|
|
|
|
|
|
/// <returns>下一次执行时间</returns>
|
|
|
|
|
|
private static DateTime CalculateNextRunTime(DateTime now)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var nextRunTime = new DateTime(now.Year, now.Month, now.Day, now.Hour, now.Minute, RunSecond);
|
|
|
|
|
|
// 如果当前秒已过触发点,则顺延到下一分钟
|
|
|
|
|
|
return now.Second >= RunSecond ? nextRunTime.AddMinutes(1) : nextRunTime;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 输出电话机在线状态日志。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="isOnline">是否在线</param>
|
|
|
|
|
|
private static void LogPhoneStatus(bool isOnline)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error(isOnline
|
|
|
|
|
|
? "电话机在线,开始判断!+++++str+++++"
|
|
|
|
|
|
: "电话机不在线,下面内容可能不会执行!+++++err+++++");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 通过本机进程名判断电话机程序是否运行。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <returns>在线返回 true,否则 false</returns>
|
|
|
|
|
|
private static bool CheckPhoneIsOnline()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 约定进程名为 Telephone
|
2025-11-20 09:56:11 +08:00
|
|
|
|
return Process.GetProcessesByName("Telephone").Length > 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"电话机进程检查失败: {ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 整点通知任务调度(北京时间):
|
|
|
|
|
|
/// - 非整点直接返回
|
|
|
|
|
|
/// - 同一整点只执行一次
|
|
|
|
|
|
/// - 10/15/22 点执行每日任务,其余整点执行整点短信
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
private void RunHourlyNotificationTask()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 当前北京时间
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var beijingTime = DateTime.UtcNow.AddHours(8);
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 仅整点触发
|
2025-11-20 09:56:11 +08:00
|
|
|
|
if (beijingTime.Minute != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 当前整点键,用于去重
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var hourlyKey = new DateTime(beijingTime.Year, beijingTime.Month, beijingTime.Day, beijingTime.Hour, 0, 0);
|
|
|
|
|
|
if (_executedTasks.ContainsKey(hourlyKey))
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 避免重复执行
|
2025-11-20 09:56:11 +08:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"准备执行整点短信任务 - 时间点: {hourlyKey:yyyy-MM-dd HH:mm}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 每日固定时点执行“每日任务”,否则执行“整点短信”
|
|
|
|
|
|
if (DailyTaskHours.Contains(beijingTime.Hour))
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
ExecuteDailyTask(beijingTime);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
SendHourlySms(beijingTime);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 标记当前整点已执行
|
2025-11-20 09:56:11 +08:00
|
|
|
|
_executedTasks[hourlyKey] = true;
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 清理历史日期记录
|
2025-11-20 09:56:11 +08:00
|
|
|
|
CleanupOldTasks(beijingTime);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 清理前一天及更早的整点执行记录。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="currentTime">当前时间(北京时间)</param>
|
2025-11-20 09:56:11 +08:00
|
|
|
|
private void CleanupOldTasks(DateTime currentTime)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var keysToRemove = _executedTasks.Keys.Where(key => key.Date < currentTime.Date).ToList();
|
2025-11-20 09:56:11 +08:00
|
|
|
|
foreach (var key in keysToRemove)
|
|
|
|
|
|
{
|
|
|
|
|
|
_executedTasks.Remove(key);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 发送整点短信。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="beijingTime">当前北京时间</param>
|
|
|
|
|
|
private void SendHourlySms(DateTime beijingTime)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 生成展示时间文本
|
|
|
|
|
|
var currentTimestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
|
|
|
|
|
|
var dateTimeStr = $"{beijingTime.Month}月{beijingTime.Day}日{beijingTime.Hour}点";
|
|
|
|
|
|
var smsContent = $"[BLV运维提示] 整点系统状态报告。当前时间:{dateTimeStr}";
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 仅发送短信到 Mobile1
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var request = CreateSmsRequest(
|
|
|
|
|
|
type: "2",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
deadline: currentTimestamp + SmsDeadlineSeconds,
|
2025-11-20 09:56:11 +08:00
|
|
|
|
phone: Mobile1,
|
|
|
|
|
|
caller: "整点报告",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
content: smsContent);
|
|
|
|
|
|
|
|
|
|
|
|
// 投递短信任务
|
|
|
|
|
|
_callAndMsgController.SendToPhone(request);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"发送整点短信时出错:{ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 执行每日定时通知(短信 + 电话)。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="beijingTime">当前北京时间</param>
|
|
|
|
|
|
private void ExecuteDailyTask(DateTime beijingTime)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var currentTimestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
|
|
|
|
|
|
var dateTimeStr = $"{beijingTime.Month}月{beijingTime.Day}日{beijingTime.Hour}点";
|
|
|
|
|
|
var smsContent = $"[BLV运维提示] 每日定时通知。当前时间为:{dateTimeStr}";
|
|
|
|
|
|
var callContent = $"BLV运维提示 每日定时通知 当前时间为 {dateTimeStr}";
|
|
|
|
|
|
|
|
|
|
|
|
// 两路短信 + 一路电话
|
|
|
|
|
|
var smsRequest1 = CreateSmsRequest("2", currentTimestamp + SmsDeadlineSeconds, Mobile1, "每日定时通知", smsContent);
|
|
|
|
|
|
var smsRequest2 = CreateSmsRequest("2", currentTimestamp + SmsDeadlineSeconds, Mobile2, "每日定时通知", smsContent);
|
|
|
|
|
|
var callRequest = CreateSmsRequest("1", currentTimestamp + CallDeadlineSeconds, Mobile1, "每日定时通知", callContent);
|
|
|
|
|
|
|
|
|
|
|
|
// 执行发送并检查结果
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var smsResult1 = _callAndMsgController.SendToPhone(smsRequest1);
|
|
|
|
|
|
var smsResult2 = _callAndMsgController.SendToPhone(smsRequest2);
|
|
|
|
|
|
var callResult = _callAndMsgController.SendToPhone(callRequest);
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
if (!smsResult1.isok || !smsResult2.isok || !callResult.isok)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"发送每日定时通知失败:短信1={smsResult1.message} 短信2={smsResult2.message} 电话={callResult.message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"执行每日任务时出错:{ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// CPU 阈值检查:
|
|
|
|
|
|
/// 1) 先判断监控程序是否失联
|
|
|
|
|
|
/// 2) 再判断 CPU 指标是否超过阈值
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
private void CheckCpuThreshold()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 监控程序最后上报时间(来自 Redis)
|
|
|
|
|
|
var detectTimeString = CSRedisCacheHelper.redis1.Get<string>("UDPPackage_DetectTime");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
if (!string.IsNullOrEmpty(detectTimeString) &&
|
2026-02-27 14:17:13 +08:00
|
|
|
|
DateTime.TryParse(detectTimeString, out var detectTime) &&
|
2025-11-20 09:56:11 +08:00
|
|
|
|
(DateTime.UtcNow - detectTime).TotalMinutes > 10)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 超过 10 分钟未更新,触发监控失联告警
|
2025-11-20 09:56:11 +08:00
|
|
|
|
ExecuteMonitorUnavailableAlert(detectTime);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 拉取 CPU 指标
|
|
|
|
|
|
var cpuMax = CSRedisCacheHelper.redis1.Get<string>("UDPPackage_CPUMax");
|
|
|
|
|
|
var cpuAvg = CSRedisCacheHelper.redis1.Get<string>("UDPPackage_CPUAvg");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var cpuMaxValues = ParseCsvToIntList(cpuMax);
|
|
|
|
|
|
var cpuAvgValues = ParseCsvToIntList(cpuAvg);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 规则:平均 CPU >= 80 的点达到 6 个触发告警
|
|
|
|
|
|
if (CheckThreshold(cpuAvgValues, threshold: 80, requiredCount: 6))
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var cpuMinValues = ParseCsvToIntList(CSRedisCacheHelper.redis1.Get<string>("UDPPackage_CPUMin"));
|
|
|
|
|
|
ExecuteCpuAlert(cpuMaxValues, cpuMinValues, cpuAvgValues);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"CPU阈值检查错误: {ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 检查 RCU 在线数量。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
private void CheckRcuOnline()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
CheckRedisValue(
|
|
|
|
|
|
redisKey: "RCUOnLine",
|
|
|
|
|
|
baselineCount: 8,
|
|
|
|
|
|
thresholdRatio: 0.8,
|
|
|
|
|
|
alertAction: ExecuteRcuOnlineAlert,
|
|
|
|
|
|
logPrefix: "RCU主机的在线数量");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 检查 RCU 总发送包数量。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
private void CheckTotalSendPackage()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
CheckRedisValue(
|
|
|
|
|
|
redisKey: "UDPPackage_TotalSendPackage",
|
|
|
|
|
|
baselineCount: 8,
|
|
|
|
|
|
thresholdRatio: 0.6,
|
|
|
|
|
|
alertAction: ExecuteTotalSendPackageAlert,
|
|
|
|
|
|
logPrefix: "RCU主机的通讯数量");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 检查 RCU 总接收包数量。
|
|
|
|
|
|
/// 除了通用阈值逻辑外,还增加“最后 3 个值都低于固定阈值”的快速告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
private void CheckTotalRecvPackage()
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 获取接收包时序数据
|
|
|
|
|
|
var valueString = CSRedisCacheHelper.redis1.Get<string>("UDPPackage_TotalRecvPackage");
|
|
|
|
|
|
if (string.IsNullOrEmpty(valueString))
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var values = ParseCsvToIntList(valueString);
|
|
|
|
|
|
if (values.Count < 10)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 数据点不足,无法按规则判定
|
|
|
|
|
|
return;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
|
|
|
|
|
// 特殊规则:最后 3 个点都很低,立即告警
|
|
|
|
|
|
if (values.Count >= 3 && values[^3] < RecvPackageLowThreshold && values[^2] < RecvPackageLowThreshold && values[^1] < RecvPackageLowThreshold)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
ExecuteTotalRecvPackageAlert([values[^3], values[^2], values[^1]]);
|
|
|
|
|
|
return;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
|
|
|
|
|
// 回退到通用阈值规则
|
|
|
|
|
|
CheckRedisValue(
|
|
|
|
|
|
redisKey: "UDPPackage_TotalRecvPackage",
|
|
|
|
|
|
baselineCount: 8,
|
|
|
|
|
|
thresholdRatio: 0.75,
|
|
|
|
|
|
alertAction: ExecuteTotalRecvPackageAlert,
|
|
|
|
|
|
logPrefix: "RCU主机的通讯数量");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"总接收包数量检查错误: {ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 通用 Redis 时序指标检查:
|
|
|
|
|
|
/// - 以前 baselineCount 个点的平均值作为基线
|
|
|
|
|
|
/// - 计算阈值(平均值 * thresholdRatio)
|
|
|
|
|
|
/// - 若后续两个点都低于阈值则触发告警
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="redisKey">Redis 键</param>
|
|
|
|
|
|
/// <param name="baselineCount">基线样本数量</param>
|
|
|
|
|
|
/// <param name="thresholdRatio">阈值比例</param>
|
|
|
|
|
|
/// <param name="alertAction">告警动作</param>
|
|
|
|
|
|
/// <param name="logPrefix">日志前缀</param>
|
|
|
|
|
|
private void CheckRedisValue(string redisKey, int baselineCount, double thresholdRatio, Action<List<int>> alertAction, string logPrefix)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 从 Redis 读取 CSV 字符串
|
|
|
|
|
|
var valueString = CSRedisCacheHelper.redis1.Get<string>(redisKey);
|
|
|
|
|
|
if (string.IsNullOrEmpty(valueString))
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var values = ParseCsvToIntList(valueString);
|
|
|
|
|
|
if (values.Count < 10)
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 计算阈值
|
|
|
|
|
|
var average = values.Take(baselineCount).Average();
|
|
|
|
|
|
var threshold = average * thresholdRatio;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 后续两个点均低于阈值才触发
|
2025-11-20 09:56:11 +08:00
|
|
|
|
if (values[baselineCount] < threshold && values[baselineCount + 1] < threshold)
|
|
|
|
|
|
{
|
|
|
|
|
|
alertAction(values);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"{logPrefix}检查错误: {ex.Message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// RCU 在线数量告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteRcuOnlineAlert(List<int> values)
|
|
|
|
|
|
{
|
|
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: $"[BLV运维提示] RCU主机在线数量低于正常值,请立即检查。数据:{string.Join(",", values)}",
|
|
|
|
|
|
callContent: "BLV运维提示 RCU主机在线数量低于正常值 请立即检查",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
alertType: "RCU-在线数量警报");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// RCU 发送数量告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteTotalSendPackageAlert(List<int> values)
|
|
|
|
|
|
{
|
|
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: $"[BLV运维提示] RCU发送数量低于预期值,请立即检查。数据:{string.Join(",", values)}",
|
|
|
|
|
|
callContent: "BLV运维提示 RCU发送数量低于预期值 请立即检查",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
alertType: "RCU-通讯数量警报");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// RCU 接收数量告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteTotalRecvPackageAlert(List<int> values)
|
|
|
|
|
|
{
|
|
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: $"[BLV运维提示] RCU接收数量低于预期值,请立即检查。数据:{string.Join(",", values)}",
|
|
|
|
|
|
callContent: "BLV运维提示 RCU接收数量低于预期值 请立即检查",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
alertType: "RCU-通讯数量警报");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// CPU 告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteCpuAlert(List<int> cpuMax, List<int> cpuMin, List<int> cpuAvg)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 拼接 CPU 指标明细
|
|
|
|
|
|
var dataString = $"AVG:{string.Join(",", cpuAvg)},MAX:{string.Join(",", cpuMax)},MIN:{string.Join(",", cpuMin)}";
|
2025-11-20 09:56:11 +08:00
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: $"[BLV运维提示] RCU服务器的CPU使用率告警。{dataString}",
|
|
|
|
|
|
callContent: "BLV运维提示 RCU服务器的CPU使用率告警 请立即检查",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
alertType: "RCU-CPU警报");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 监控程序失联告警。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="detectTime">最后检测时间(当前版本仅用于语义传参)</param>
|
2025-11-20 09:56:11 +08:00
|
|
|
|
private void ExecuteMonitorUnavailableAlert(DateTime detectTime)
|
|
|
|
|
|
{
|
|
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: "[BLV运维提示] RCU服务器的监控程序无法访问,请立即检查。",
|
|
|
|
|
|
callContent: "BLV运维提示 RCU服务器的监控程序无法访问 请立即检查",
|
|
|
|
|
|
alertType: "RCU-监控程序警报",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
extendedDeadline: true);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-09 19:31:55 +08:00
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 检查 PostgreSQL 心跳表,判断 Kafka 入库是否活跃。
|
2026-02-09 19:31:55 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private async Task CheckKafkaHeartbeatAsync()
|
|
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-03-25 09:07:18 +08:00
|
|
|
|
// 北京时间 3:00-4:00 为数据库备份窗口,此时不做心跳检查,避免误报
|
|
|
|
|
|
var beijingTime = DateTime.UtcNow.AddHours(8);
|
|
|
|
|
|
if (beijingTime.Hour >= KafkaCheckSkipStartHour && beijingTime.Hour < KafkaCheckSkipEndHour)
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 从配置构建连接串
|
|
|
|
|
|
var connectionString = BuildPostgresConnectionString();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
if (string.IsNullOrWhiteSpace(connectionString))
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error("Postgres配置缺失,无法检查Kafka入库心跳");
|
|
|
|
|
|
// 配置缺失等价于连接失败告警路径
|
2026-02-10 09:07:46 +08:00
|
|
|
|
ExecuteKafkaDbConnectionAlert();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 建立数据库连接
|
2026-02-09 19:31:55 +08:00
|
|
|
|
await using var connection = new NpgsqlConnection(connectionString);
|
|
|
|
|
|
await connection.OpenAsync();
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 查询最近数据中的最新 write_ts_ms
|
2026-02-11 10:01:03 +08:00
|
|
|
|
const string sql = @"SELECT write_ts_ms
|
|
|
|
|
|
FROM (
|
|
|
|
|
|
SELECT write_ts_ms
|
2026-03-24 08:30:06 +08:00
|
|
|
|
FROM heartbeat.heartbeat_events_g5
|
2026-02-11 10:01:03 +08:00
|
|
|
|
ORDER BY ts_ms DESC
|
|
|
|
|
|
LIMIT 3000
|
|
|
|
|
|
) AS recent_events
|
|
|
|
|
|
ORDER BY write_ts_ms DESC
|
|
|
|
|
|
LIMIT 1;";
|
2026-02-27 14:17:13 +08:00
|
|
|
|
|
2026-02-09 19:31:55 +08:00
|
|
|
|
await using var command = new NpgsqlCommand(sql, connection);
|
2026-02-27 14:17:13 +08:00
|
|
|
|
var result = await command.ExecuteScalarAsync();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 空结果按数据库异常路径处理
|
2026-02-09 19:31:55 +08:00
|
|
|
|
if (result == null || result == DBNull.Value)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error("Kafka入库心跳ts_ms查询结果为空");
|
2026-02-10 09:07:46 +08:00
|
|
|
|
ExecuteKafkaDbConnectionAlert();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 解析时间戳(毫秒)
|
|
|
|
|
|
if (!long.TryParse(result.ToString(), out var lastTsMs))
|
2026-02-09 19:31:55 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error("Kafka入库心跳ts_ms解析失败");
|
2026-02-10 09:07:46 +08:00
|
|
|
|
ExecuteKafkaDbConnectionAlert();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 按“当前时间 - 最新入库时间”判断是否超时
|
|
|
|
|
|
var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
|
|
|
|
|
if (nowMs - lastTsMs > TimeSpan.FromMinutes(KafkaStaleMinutes).TotalMilliseconds)
|
2026-02-09 19:31:55 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"Kafka入库心跳超过{KafkaStaleMinutes}分钟未更新");
|
2026-02-09 19:31:55 +08:00
|
|
|
|
ExecuteKafkaInactiveAlert();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"Kafka入库心跳检查错误: {ex.Message}");
|
2026-02-10 09:07:46 +08:00
|
|
|
|
ExecuteKafkaDbConnectionAlert();
|
2026-02-09 19:31:55 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 从配置读取 Postgres 参数并生成连接字符串。
|
2026-02-09 19:31:55 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <returns>可用连接串;若关键配置缺失则返回 null</returns>
|
2026-02-09 19:31:55 +08:00
|
|
|
|
private string? BuildPostgresConnectionString()
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 约定配置节点:Postgres
|
|
|
|
|
|
var section = _configuration.GetSection("Postgres");
|
|
|
|
|
|
|
|
|
|
|
|
var host = section["Host"];
|
|
|
|
|
|
var portString = section["Port"];
|
|
|
|
|
|
var database = section["Database"];
|
|
|
|
|
|
var username = section["User"];
|
|
|
|
|
|
var password = section["Password"];
|
|
|
|
|
|
var maxConnectionsString = section["MaxConnections"];
|
|
|
|
|
|
var idleTimeoutMsString = section["IdleTimeoutMs"];
|
|
|
|
|
|
|
|
|
|
|
|
// 必填项校验
|
2026-02-09 19:31:55 +08:00
|
|
|
|
if (string.IsNullOrWhiteSpace(host) ||
|
|
|
|
|
|
string.IsNullOrWhiteSpace(portString) ||
|
|
|
|
|
|
string.IsNullOrWhiteSpace(database) ||
|
|
|
|
|
|
string.IsNullOrWhiteSpace(username) ||
|
|
|
|
|
|
string.IsNullOrWhiteSpace(password))
|
|
|
|
|
|
{
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 端口格式校验
|
|
|
|
|
|
if (!int.TryParse(portString, out var port))
|
2026-02-09 19:31:55 +08:00
|
|
|
|
{
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 构建基础连接串
|
2026-02-09 19:31:55 +08:00
|
|
|
|
var builder = new NpgsqlConnectionStringBuilder
|
|
|
|
|
|
{
|
|
|
|
|
|
Host = host,
|
|
|
|
|
|
Port = port,
|
|
|
|
|
|
Database = database,
|
|
|
|
|
|
Username = username,
|
|
|
|
|
|
Password = password
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 连接池最大连接数(可选)
|
|
|
|
|
|
if (int.TryParse(maxConnectionsString, out var maxConnections) && maxConnections > 0)
|
2026-02-09 19:31:55 +08:00
|
|
|
|
{
|
|
|
|
|
|
builder.MaxPoolSize = maxConnections;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 空闲连接生命周期(ms -> s)
|
|
|
|
|
|
if (int.TryParse(idleTimeoutMsString, out var idleTimeoutMs) && idleTimeoutMs > 0)
|
2026-02-09 19:31:55 +08:00
|
|
|
|
{
|
|
|
|
|
|
builder.ConnectionIdleLifetime = Math.Max(1, idleTimeoutMs / 1000);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return builder.ConnectionString;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// Kafka 入库停滞告警。
|
2026-02-09 19:31:55 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteKafkaInactiveAlert()
|
|
|
|
|
|
{
|
|
|
|
|
|
SendAlert(
|
2026-02-11 10:01:03 +08:00
|
|
|
|
smsContent: "[BLV运维提示] BLS数据库3分钟内入库数据为0。",
|
|
|
|
|
|
callContent: "BLV运维提示 BLS数据库3分钟内入库数据为0",
|
2026-02-09 19:31:55 +08:00
|
|
|
|
alertType: "BLS-数据库入库警报",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
extendedDeadline: true);
|
2026-02-09 19:31:55 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-10 09:07:46 +08:00
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// Kafka 数据库连接异常告警(带计数节流)。
|
2026-02-10 09:07:46 +08:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
private void ExecuteKafkaDbConnectionAlert()
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 每次失败计数 +1,累计到阈值再告警
|
2026-02-13 15:06:43 +08:00
|
|
|
|
_kafkaDbConnectionAlertCount++;
|
2026-02-27 14:17:13 +08:00
|
|
|
|
if (_kafkaDbConnectionAlertCount < KafkaDbAlertTriggerCount)
|
2026-02-13 15:06:43 +08:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 触发一次后清零重新计数
|
2026-02-10 09:07:46 +08:00
|
|
|
|
SendAlert(
|
|
|
|
|
|
smsContent: "[BLV运维提示] 数据库连接失败!",
|
|
|
|
|
|
callContent: "[BLV运维提示] 数据库连接失败",
|
|
|
|
|
|
alertType: "BLS-数据库连接警报",
|
2026-02-27 14:17:13 +08:00
|
|
|
|
extendedDeadline: true);
|
2026-02-13 15:06:43 +08:00
|
|
|
|
|
|
|
|
|
|
_kafkaDbConnectionAlertCount = 0;
|
2026-02-10 09:07:46 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 将逗号分隔字符串解析为整型列表;解析失败项按 0 处理。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="valueString">CSV 字符串</param>
|
|
|
|
|
|
/// <returns>整型列表</returns>
|
|
|
|
|
|
private static List<int> ParseCsvToIntList(string valueString)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
if (string.IsNullOrEmpty(valueString))
|
|
|
|
|
|
{
|
|
|
|
|
|
return [];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return valueString
|
|
|
|
|
|
.Split(',')
|
|
|
|
|
|
.Select(item => int.TryParse(item, out var number) ? number : 0)
|
|
|
|
|
|
.ToList();
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 判断列表中是否至少有 requiredCount 个值达到 threshold。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="values">待检查值集合</param>
|
|
|
|
|
|
/// <param name="threshold">阈值</param>
|
|
|
|
|
|
/// <param name="requiredCount">最少命中数量</param>
|
|
|
|
|
|
/// <returns>满足返回 true</returns>
|
|
|
|
|
|
private static bool CheckThreshold(List<int> values, int threshold, int requiredCount)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
return values.Count >= requiredCount && values.Count(v => v >= threshold) >= requiredCount;
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 统一告警发送入口(两条短信 + 一通电话)。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="smsContent">短信内容</param>
|
|
|
|
|
|
/// <param name="callContent">电话播报内容</param>
|
|
|
|
|
|
/// <param name="alertType">告警类型(用于 caller 与日志)</param>
|
|
|
|
|
|
/// <param name="extendedDeadline">是否使用扩展过期时间</param>
|
2025-11-20 09:56:11 +08:00
|
|
|
|
private void SendAlert(string smsContent, string callContent, string alertType, bool extendedDeadline = false)
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 计算任务过期时间
|
|
|
|
|
|
var nowSeconds = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
|
|
|
|
|
|
var smsDeadline = nowSeconds + (extendedDeadline ? ExtendedSmsDeadlineSeconds : SmsDeadlineSeconds);
|
|
|
|
|
|
var callDeadline = nowSeconds + (extendedDeadline ? ExtendedCallDeadlineSeconds : CallDeadlineSeconds);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 构建请求:两路短信(Mobile1/2)+ 一路电话(Mobile1)
|
|
|
|
|
|
var smsRequest1 = CreateSmsRequest("2", smsDeadline, Mobile1, alertType, smsContent);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var smsRequest2 = CreateSmsRequest("2", smsDeadline, Mobile2, alertType, smsContent);
|
|
|
|
|
|
var callRequest = CreateSmsRequest("1", callDeadline, Mobile1, alertType, callContent);
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 调用 API 投递任务
|
|
|
|
|
|
var smsResult1 = _callAndMsgController.SendToPhone(smsRequest1);
|
2025-11-20 09:56:11 +08:00
|
|
|
|
var smsResult2 = _callAndMsgController.SendToPhone(smsRequest2);
|
|
|
|
|
|
var callResult = _callAndMsgController.SendToPhone(callRequest);
|
|
|
|
|
|
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 任意一路失败都记录错误日志
|
|
|
|
|
|
if (!smsResult1.isok || !smsResult2.isok || !callResult.isok)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
Logger.Error($"发送{alertType}通知失败: 短信1={smsResult1.message} 短信2={smsResult2.message} 电话={callResult.message}");
|
2025-11-20 09:56:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// 创建短信/电话请求对象。
|
2025-11-20 09:56:11 +08:00
|
|
|
|
/// </summary>
|
2026-02-27 14:17:13 +08:00
|
|
|
|
/// <param name="type">1=电话,2=短信</param>
|
|
|
|
|
|
/// <param name="deadline">截止时间(Unix 秒)</param>
|
|
|
|
|
|
/// <param name="phone">目标手机号</param>
|
|
|
|
|
|
/// <param name="caller">任务标识/来电名称</param>
|
|
|
|
|
|
/// <param name="content">内容</param>
|
|
|
|
|
|
/// <returns>SmsRequest 对象</returns>
|
|
|
|
|
|
private static SmsRequest CreateSmsRequest(string type, long deadline, string phone, string caller, string content)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
{
|
|
|
|
|
|
return new SmsRequest
|
|
|
|
|
|
{
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 业务类型(1 电话 / 2 短信)
|
2025-11-20 09:56:11 +08:00
|
|
|
|
Type = type,
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 截止时间
|
2025-11-20 09:56:11 +08:00
|
|
|
|
DeadLine = deadline,
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 创建时间
|
2025-11-20 09:56:11 +08:00
|
|
|
|
StartingPoint = DateTimeOffset.UtcNow.ToUnixTimeSeconds(),
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 目标号码
|
2025-11-20 09:56:11 +08:00
|
|
|
|
PhoneNumber = phone,
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 任务标识
|
2025-11-20 09:56:11 +08:00
|
|
|
|
CallerName = caller,
|
2026-02-27 14:17:13 +08:00
|
|
|
|
// 消息内容
|
2025-11-20 09:56:11 +08:00
|
|
|
|
Content = content
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-27 14:17:13 +08:00
|
|
|
|
}
|