From 67250d6302f42bd9ccb8ac83350b1bbcc71b4086 Mon Sep 17 00:00:00 2001 From: Nelson Chan <3271800+chakflying@users.noreply.github.com> Date: Fri, 24 Nov 2023 18:11:36 +0800 Subject: [PATCH] Feat: Retries persistence (#3814) * Feat: Retries persistence * Fix: Set duration for first beat of push monitor * Feat: Update UptimeCalculator in push route * Fix: Handle resend in push route * Chore: Remove debug log --- .../2023-09-29-0000-heartbeat-retires.js | 15 +++ server/model/heartbeat.js | 15 +-- server/model/monitor.js | 15 ++- server/routers/api-router.js | 112 ++++++++++++++---- 4 files changed, 124 insertions(+), 33 deletions(-) create mode 100644 db/knex_migrations/2023-09-29-0000-heartbeat-retires.js diff --git a/db/knex_migrations/2023-09-29-0000-heartbeat-retires.js b/db/knex_migrations/2023-09-29-0000-heartbeat-retires.js new file mode 100644 index 00000000..a6b9c7bb --- /dev/null +++ b/db/knex_migrations/2023-09-29-0000-heartbeat-retires.js @@ -0,0 +1,15 @@ +exports.up = function (knex) { + // Add new column heartbeat.retries + return knex.schema + .alterTable("heartbeat", function (table) { + table.integer("retries").notNullable().defaultTo(0); + }); + +}; + +exports.down = function (knex) { + return knex.schema + .alterTable("heartbeat", function (table) { + table.dropColumn("retries"); + }); +}; diff --git a/server/model/heartbeat.js b/server/model/heartbeat.js index cc86ef63..9e972a37 100644 --- a/server/model/heartbeat.js +++ b/server/model/heartbeat.js @@ -29,13 +29,14 @@ class Heartbeat extends BeanModel { */ toJSON() { return { - monitorID: this.monitor_id, - status: this.status, - time: this.time, - msg: this.msg, - ping: this.ping, - important: this.important, - duration: this.duration, + monitorID: this._monitorId, + status: this._status, + time: this._time, + msg: this._msg, + ping: this._ping, + important: this._important, + duration: this._duration, + retries: this._retries, }; } diff --git a/server/model/monitor.js b/server/model/monitor.js index f2e14269..809580d6 100644 --- a/server/model/monitor.js +++ b/server/model/monitor.js @@ -361,6 +361,9 @@ class Monitor extends BeanModel { previousBeat = await R.findOne("heartbeat", " monitor_id = ? ORDER BY time DESC", [ this.id, ]); + if (previousBeat) { + retries = previousBeat.retries; + } } const isFirstBeat = !previousBeat; @@ -632,6 +635,7 @@ class Monitor extends BeanModel { // If the previous beat was down or pending we use the regular // beatInterval/retryInterval in the setTimeout further below if (previousBeat.status !== (this.isUpsideDown() ? DOWN : UP) || msSinceLastBeat > beatInterval * 1000 + bufferTime) { + bean.duration = Math.round(msSinceLastBeat / 1000); throw new Error("No heartbeat in the time window"); } else { let timeout = beatInterval * 1000 - msSinceLastBeat; @@ -647,6 +651,7 @@ class Monitor extends BeanModel { return; } } else { + bean.duration = beatInterval; throw new Error("No heartbeat in the time window"); } @@ -915,9 +920,14 @@ class Monitor extends BeanModel { } else if ((this.maxretries > 0) && (retries < this.maxretries)) { retries++; bean.status = PENDING; + } else { + // Continue counting retries during DOWN + retries++; } } + bean.retries = retries; + log.debug("monitor", `[${this.name}] Check isImportant`); let isImportant = Monitor.isImportantBeat(isFirstBeat, previousBeat?.status, bean.status); @@ -1437,10 +1447,7 @@ class Monitor extends BeanModel { * @returns {Promise>} Previous heartbeat */ static async getPreviousHeartbeat(monitorID) { - return await R.getRow(` - SELECT ping, status, time FROM heartbeat - WHERE id = (select MAX(id) from heartbeat where monitor_id = ?) - `, [ + return await R.findOne("heartbeat", " id = (select MAX(id) from heartbeat where monitor_id = ?)", [ monitorID ]); } diff --git a/server/routers/api-router.js b/server/routers/api-router.js index 0549518f..7b14a6da 100644 --- a/server/routers/api-router.js +++ b/server/routers/api-router.js @@ -64,38 +64,57 @@ router.get("/api/push/:pushToken", async (request, response) => { const previousHeartbeat = await Monitor.getPreviousHeartbeat(monitor.id); - if (monitor.isUpsideDown()) { - status = flipStatus(status); - } - let isFirstBeat = true; - let previousStatus = status; - let duration = 0; let bean = R.dispense("heartbeat"); bean.time = R.isoDateTimeMillis(dayjs.utc()); + bean.monitor_id = monitor.id; + bean.ping = ping; + bean.msg = msg; + bean.downCount = previousHeartbeat?.downCount || 0; if (previousHeartbeat) { isFirstBeat = false; - previousStatus = previousHeartbeat.status; - duration = dayjs(bean.time).diff(dayjs(previousHeartbeat.time), "second"); + bean.duration = dayjs(bean.time).diff(dayjs(previousHeartbeat.time), "second"); } if (await Monitor.isUnderMaintenance(monitor.id)) { msg = "Monitor under maintenance"; - status = MAINTENANCE; + bean.status = MAINTENANCE; + } else { + determineStatus(status, previousHeartbeat, monitor.maxretries, monitor.isUpsideDown(), bean); } + // Calculate uptime + let uptimeCalculator = await UptimeCalculator.getUptimeCalculator(monitor.id); + let endTimeDayjs = await uptimeCalculator.update(bean.status, parseFloat(bean.ping)); + bean.end_time = R.isoDateTimeMillis(endTimeDayjs); + log.debug("router", `/api/push/ called at ${dayjs().format("YYYY-MM-DD HH:mm:ss.SSS")}`); - log.debug("router", "PreviousStatus: " + previousStatus); - log.debug("router", "Current Status: " + status); + log.debug("router", "PreviousStatus: " + previousHeartbeat?.status); + log.debug("router", "Current Status: " + bean.status); - bean.important = Monitor.isImportantBeat(isFirstBeat, previousStatus, status); - bean.monitor_id = monitor.id; - bean.status = status; - bean.msg = msg; - bean.ping = ping; - bean.duration = duration; + bean.important = Monitor.isImportantBeat(isFirstBeat, previousHeartbeat?.status, status); + + if (Monitor.isImportantForNotification(isFirstBeat, previousHeartbeat?.status, status)) { + // Reset down count + bean.downCount = 0; + + log.debug("monitor", `[${this.name}] sendNotification`); + await Monitor.sendNotification(isFirstBeat, monitor, bean); + } else { + if (bean.status === DOWN && this.resendInterval > 0) { + ++bean.downCount; + if (bean.downCount >= this.resendInterval) { + // Send notification again, because we are still DOWN + log.debug("monitor", `[${this.name}] sendNotification again: Down Count: ${bean.downCount} | Resend Interval: ${this.resendInterval}`); + await Monitor.sendNotification(isFirstBeat, this, bean); + + // Reset down count + bean.downCount = 0; + } + } + } await R.store(bean); @@ -107,11 +126,6 @@ router.get("/api/push/:pushToken", async (request, response) => { response.json({ ok: true, }); - - if (Monitor.isImportantForNotification(isFirstBeat, previousStatus, status)) { - await Monitor.sendNotification(isFirstBeat, monitor, bean); - } - } catch (e) { response.status(404).json({ ok: false, @@ -562,4 +576,58 @@ router.get("/api/badge/:id/response", cache("5 minutes"), async (request, respon } }); +/** + * Determines the status of the next beat in the push route handling. + * @param {string} status - The reported new status. + * @param {object} previousHeartbeat - The previous heartbeat object. + * @param {number} maxretries - The maximum number of retries allowed. + * @param {boolean} isUpsideDown - Indicates if the monitor is upside down. + * @param {object} bean - The new heartbeat object. + * @returns {void} + */ +function determineStatus(status, previousHeartbeat, maxretries, isUpsideDown, bean) { + if (isUpsideDown) { + status = flipStatus(status); + } + + if (previousHeartbeat) { + if (previousHeartbeat.status === UP && status === DOWN) { + // Going Down + if ((maxretries > 0) && (previousHeartbeat.retries < maxretries)) { + // Retries available + bean.retries = previousHeartbeat.retries + 1; + bean.status = PENDING; + } else { + // No more retries + bean.retries = 0; + bean.status = DOWN; + } + } else if (previousHeartbeat.status === PENDING && status === DOWN && previousHeartbeat.retries < maxretries) { + // Retries available + bean.retries = previousHeartbeat.retries + 1; + bean.status = PENDING; + } else { + // No more retries or not pending + if (status === DOWN) { + bean.retries = previousHeartbeat.retries + 1; + bean.status = status; + } else { + bean.retries = 0; + bean.status = status; + } + } + } else { + // First beat? + if (status === DOWN && maxretries > 0) { + // Retries available + bean.retries = 1; + bean.status = PENDING; + } else { + // Retires not enabled + bean.retries = 0; + bean.status = status; + } + } +} + module.exports = router;