Skip to content

Commit 6989272

Browse files
authored
Merge pull request #80 from Sysone-Final/feature/SYSONE-73-monitoring
Feature/sysone 73 monitoring
2 parents ec8070a + e3b0643 commit 6989272

4 files changed

Lines changed: 718 additions & 451 deletions

File tree

src/main/java/org/example/finalbe/domains/alert/service/AlertEvaluationService.java

Lines changed: 81 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.example.finalbe.domains.rack.repository.RackRepository;
2424
import org.example.finalbe.domains.serverroom.domain.ServerRoom;
2525
import org.example.finalbe.domains.serverroom.repository.ServerRoomRepository;
26+
import org.springframework.dao.DataIntegrityViolationException;
2627
import org.springframework.scheduling.annotation.Async;
2728
import org.springframework.stereotype.Service;
2829
import org.springframework.transaction.annotation.Transactional;
@@ -185,7 +186,7 @@ public void evaluateNetworkMetric(NetworkMetric metric) {
185186
// 에러율 평가 (RX)
186187
if (metric.getInErrorPktsTot() != null && metric.getInPktsTot() != null &&
187188
metric.getInPktsTot() > 0) {
188-
double errorRate = (metric.getInErrorPktsTot() * 100.0) / metric.getInPktsTot();
189+
double errorRate = (metric.getInErrorPktsTot().doubleValue() / metric.getInPktsTot().doubleValue()) * 100;
189190
evaluateNetworkErrorRate(
190191
equipment,
191192
"rx_error_rate",
@@ -199,7 +200,7 @@ public void evaluateNetworkMetric(NetworkMetric metric) {
199200
// 에러율 평가 (TX)
200201
if (metric.getOutErrorPktsTot() != null && metric.getOutPktsTot() != null &&
201202
metric.getOutPktsTot() > 0) {
202-
double errorRate = (metric.getOutErrorPktsTot() * 100.0) / metric.getOutPktsTot();
203+
double errorRate = (metric.getOutErrorPktsTot().doubleValue() / metric.getOutPktsTot().doubleValue()) * 100;
203204
evaluateNetworkErrorRate(
204205
equipment,
205206
"tx_error_rate",
@@ -213,7 +214,7 @@ public void evaluateNetworkMetric(NetworkMetric metric) {
213214
// 드롭율 평가 (RX)
214215
if (metric.getInDiscardPktsTot() != null && metric.getInPktsTot() != null &&
215216
metric.getInPktsTot() > 0) {
216-
double dropRate = (metric.getInDiscardPktsTot() * 100.0) / metric.getInPktsTot();
217+
double dropRate = (metric.getInDiscardPktsTot().doubleValue() / metric.getInPktsTot().doubleValue()) * 100;
217218
evaluateNetworkDropRate(
218219
equipment,
219220
"rx_drop_rate",
@@ -227,7 +228,7 @@ public void evaluateNetworkMetric(NetworkMetric metric) {
227228
// 드롭율 평가 (TX)
228229
if (metric.getOutDiscardPktsTot() != null && metric.getOutPktsTot() != null &&
229230
metric.getOutPktsTot() > 0) {
230-
double dropRate = (metric.getOutDiscardPktsTot() * 100.0) / metric.getOutPktsTot();
231+
double dropRate = (metric.getOutDiscardPktsTot().doubleValue() / metric.getOutPktsTot().doubleValue()) * 100;
231232
evaluateNetworkDropRate(
232233
equipment,
233234
"tx_drop_rate",
@@ -249,12 +250,15 @@ public void evaluateNetworkMetric(NetworkMetric metric) {
249250
*/
250251
private void evaluateNetworkUsage(
251252
Equipment equipment,
252-
String metricName,
253+
String baseMetricName,
253254
Double usage,
254255
String nicName,
255256
LocalDateTime generateTime,
256257
AlertSettingsDto settings) {
257258

259+
// ✅ NIC 정보를 포함한 고유한 metricName 생성
260+
String metricName = baseMetricName + "_" + nicName;
261+
258262
// 임계값이 설정되지 않은 경우 기본값 사용 (80% 경고, 90% 위험)
259263
Double warningThreshold = 80.0;
260264
Double criticalThreshold = 90.0;
@@ -277,12 +281,15 @@ private void evaluateNetworkUsage(
277281
*/
278282
private void evaluateNetworkErrorRate(
279283
Equipment equipment,
280-
String metricName,
284+
String baseMetricName,
281285
Double errorRate,
282286
String nicName,
283287
LocalDateTime generateTime,
284288
AlertSettingsDto settings) {
285289

290+
// ✅ NIC 정보를 포함한 고유한 metricName 생성
291+
String metricName = baseMetricName + "_" + nicName;
292+
286293
evaluateMetric(
287294
TargetType.EQUIPMENT,
288295
equipment.getId(),
@@ -301,12 +308,15 @@ private void evaluateNetworkErrorRate(
301308
*/
302309
private void evaluateNetworkDropRate(
303310
Equipment equipment,
304-
String metricName,
311+
String baseMetricName,
305312
Double dropRate,
306313
String nicName,
307314
LocalDateTime generateTime,
308315
AlertSettingsDto settings) {
309316

317+
// ✅ NIC 정보를 포함한 고유한 metricName 생성
318+
String metricName = baseMetricName + "_" + nicName;
319+
310320
evaluateMetric(
311321
TargetType.EQUIPMENT,
312322
equipment.getId(),
@@ -320,6 +330,66 @@ private void evaluateNetworkDropRate(
320330
);
321331
}
322332

333+
/**
334+
* Tracker 조회 또는 생성 (동시성 처리 추가)
335+
*/
336+
private AlertViolationTracker getOrCreateTracker(
337+
TargetType targetType, Long targetId,
338+
MetricType metricType, String metricName) {
339+
340+
Optional<AlertViolationTracker> existing = switch (targetType) {
341+
case EQUIPMENT -> violationTrackerRepository.findByEquipmentIdAndMetric(
342+
targetId, metricType, metricName);
343+
case RACK -> violationTrackerRepository.findByRackIdAndMetric(
344+
targetId, metricType, metricName);
345+
case SERVER_ROOM -> violationTrackerRepository.findByServerRoomIdAndMetric(
346+
targetId, metricType, metricName);
347+
case DATA_CENTER -> violationTrackerRepository.findByDataCenterIdAndMetric(
348+
targetId, metricType, metricName);
349+
};
350+
351+
return existing.orElseGet(() -> {
352+
try {
353+
AlertViolationTracker newTracker = AlertViolationTracker.builder()
354+
.targetType(targetType)
355+
.metricType(metricType)
356+
.metricName(metricName)
357+
.consecutiveViolations(0)
358+
.lastViolationTime(LocalDateTime.now())
359+
.build();
360+
361+
switch (targetType) {
362+
case EQUIPMENT -> newTracker.setEquipmentId(targetId);
363+
case RACK -> newTracker.setRackId(targetId);
364+
case SERVER_ROOM -> newTracker.setServerRoomId(targetId);
365+
case DATA_CENTER -> newTracker.setDataCenterId(targetId);
366+
}
367+
368+
return violationTrackerRepository.save(newTracker);
369+
370+
} catch (DataIntegrityViolationException e) {
371+
// ✅ 동시에 생성된 경우 다시 조회
372+
log.warn("⚠️ Tracker 중복 생성 감지, 재조회: targetType={}, targetId={}, metric={}",
373+
targetType, targetId, metricName);
374+
375+
return switch (targetType) {
376+
case EQUIPMENT -> violationTrackerRepository
377+
.findByEquipmentIdAndMetric(targetId, metricType, metricName)
378+
.orElseThrow(() -> new IllegalStateException("Tracker 재조회 실패"));
379+
case RACK -> violationTrackerRepository
380+
.findByRackIdAndMetric(targetId, metricType, metricName)
381+
.orElseThrow(() -> new IllegalStateException("Tracker 재조회 실패"));
382+
case SERVER_ROOM -> violationTrackerRepository
383+
.findByServerRoomIdAndMetric(targetId, metricType, metricName)
384+
.orElseThrow(() -> new IllegalStateException("Tracker 재조회 실패"));
385+
case DATA_CENTER -> violationTrackerRepository
386+
.findByDataCenterIdAndMetric(targetId, metricType, metricName)
387+
.orElseThrow(() -> new IllegalStateException("Tracker 재조회 실패"));
388+
};
389+
}
390+
});
391+
}
392+
323393
/**
324394
* Environment 메트릭 평가 (Rack)
325395
*/
@@ -629,40 +699,6 @@ private void resolveActiveAlerts(
629699
}
630700
}
631701

632-
private AlertViolationTracker getOrCreateTracker(
633-
TargetType targetType, Long targetId,
634-
MetricType metricType, String metricName) {
635-
636-
Optional<AlertViolationTracker> existing = switch (targetType) {
637-
case EQUIPMENT -> violationTrackerRepository.findByEquipmentIdAndMetric(
638-
targetId, metricType, metricName);
639-
case RACK -> violationTrackerRepository.findByRackIdAndMetric(
640-
targetId, metricType, metricName);
641-
case SERVER_ROOM -> violationTrackerRepository.findByServerRoomIdAndMetric(
642-
targetId, metricType, metricName);
643-
case DATA_CENTER -> violationTrackerRepository.findByDataCenterIdAndMetric(
644-
targetId, metricType, metricName);
645-
};
646-
647-
return existing.orElseGet(() -> {
648-
AlertViolationTracker newTracker = AlertViolationTracker.builder()
649-
.targetType(targetType)
650-
.metricType(metricType)
651-
.metricName(metricName)
652-
.consecutiveViolations(0)
653-
.lastViolationTime(LocalDateTime.now())
654-
.build();
655-
656-
switch (targetType) {
657-
case EQUIPMENT -> newTracker.setEquipmentId(targetId);
658-
case RACK -> newTracker.setRackId(targetId);
659-
case SERVER_ROOM -> newTracker.setServerRoomId(targetId);
660-
case DATA_CENTER -> newTracker.setDataCenterId(targetId);
661-
}
662-
663-
return violationTrackerRepository.save(newTracker);
664-
});
665-
}
666702

667703
private String buildAlertMessage(
668704
TargetType targetType, String targetName,
@@ -688,15 +724,16 @@ private AlertSettingsDto getAlertSettings() {
688724
.map(AlertSettingsDto::from)
689725
.orElseGet(AlertSettingsDto::getDefault);
690726
}
727+
691728
/**
692729
* 계층 구조에 따라 Alert의 ID들을 자동으로 채움
693730
* Equipment < Rack < ServerRoom < DataCenter
694-
*
731+
* <p>
695732
* ✅ Fetch Join을 사용하여 LazyInitializationException 방지
696733
*
697-
* @param alert AlertHistory 엔티티
734+
* @param alert AlertHistory 엔티티
698735
* @param targetType 대상 타입
699-
* @param targetId 대상 ID
736+
* @param targetId 대상 ID
700737
*/
701738
private void populateHierarchyIds(AlertHistory alert, TargetType targetType, Long targetId) {
702739
switch (targetType) {

src/main/java/org/example/finalbe/domains/equipment/service/EquipmentService.java

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ public List<EquipmentListResponse> searchEquipments(String keyword, String type,
196196

197197
/**
198198
* 장비 생성
199+
* ✅ 수정: 랙에 배치된 경우에만 Simulator에 등록
199200
*/
200201
@Transactional
201202
public EquipmentDetailResponse createEquipment(EquipmentCreateRequest request) {
@@ -236,17 +237,28 @@ public EquipmentDetailResponse createEquipment(EquipmentCreateRequest request) {
236237

237238
Equipment equipment = request.toEntity(rack);
238239

239-
// 회사 ID 자동 설정
240+
// 회사 ID 자동 설정
240241
equipment.setCompanyId(currentMember.getCompany().getId());
241242

242243
Equipment savedEquipment = equipmentRepository.save(equipment);
243244

244-
// 랙이 있을 때만 placeEquipment 호출
245+
// ✅ 수정: 랙이 있을 때만 메트릭 수집 시작
245246
if (rack != null) {
246247
rack.placeEquipment(savedEquipment, request.startUnit(), request.unitSize());
247248

248-
// ✅ 추가: 랙에 배치되면 메트릭 수집 시작
249+
// Prometheus 매핑 추가
249250
equipmentMappingService.addEquipmentMapping(savedEquipment);
251+
252+
// ✅ 수정: Simulator 등록 (랙 배치 필수 - addEquipment 내부에서 체크됨)
253+
if (savedEquipment.getType() == EquipmentType.SERVER ||
254+
savedEquipment.getType() == EquipmentType.STORAGE) {
255+
try {
256+
serverRoomDataSimulator.addEquipment(savedEquipment);
257+
} catch (Exception e) {
258+
log.error("⚠️ 시뮬레이터 등록 실패: {}", e.getMessage());
259+
}
260+
}
261+
250262
log.info("✅ 장비가 랙에 배치되어 메트릭 수집이 시작됩니다. (Equipment ID: {}, Rack ID: {})",
251263
savedEquipment.getId(), rack.getId());
252264
} else {
@@ -256,22 +268,14 @@ public EquipmentDetailResponse createEquipment(EquipmentCreateRequest request) {
256268

257269
equipmentHistoryRecorder.recordCreate(savedEquipment, currentMember);
258270

259-
// ========== 시뮬레이터 등록 ==========
260-
if (savedEquipment.getType() == EquipmentType.SERVER || savedEquipment.getType() == EquipmentType.STORAGE) {
261-
try {
262-
serverRoomDataSimulator.addEquipment(savedEquipment);
263-
} catch (Exception e) {
264-
log.error("⚠️ 시뮬레이터 등록 실패 (모니터링 데이터 생성 안됨): {}", e.getMessage());
265-
}
266-
}
267-
268271
log.info("Equipment created successfully with id: {} for company: {}",
269272
savedEquipment.getId(), savedEquipment.getCompanyId());
270273
return EquipmentDetailResponse.from(savedEquipment);
271274
}
272275

273276
/**
274277
* 장비 수정
278+
* ✅ 수정: 랙 변경 시 Simulator 등록/제거 처리
275279
*/
276280
@Transactional
277281
public EquipmentDetailResponse updateEquipment(Long id, EquipmentUpdateRequest request) {
@@ -303,7 +307,7 @@ public EquipmentDetailResponse updateEquipment(Long id, EquipmentUpdateRequest r
303307
// === 수정 전 상태 저장 ===
304308
Equipment oldEquipment = cloneEquipment(equipment);
305309

306-
// ✅ 추가: 랙 변경 감지
310+
// ✅ 랙 변경 감지
307311
Long oldRackId = equipment.getRack() != null ? equipment.getRack().getId() : null;
308312

309313
// === 상태 변경 감지 ===
@@ -347,17 +351,35 @@ public EquipmentDetailResponse updateEquipment(Long id, EquipmentUpdateRequest r
347351

348352
Equipment updatedEquipment = equipmentRepository.save(equipment);
349353

350-
// ✅ 추가: 랙 변경 확인 및 메트릭 수집 매핑 업데이트
354+
// ✅ 수정: 랙 변경 확인 및 메트릭 수집 매핑 업데이트
351355
Long newRackId = updatedEquipment.getRack() != null ? updatedEquipment.getRack().getId() : null;
352356
boolean rackChanged = !Objects.equals(oldRackId, newRackId);
353357

354358
if (rackChanged) {
359+
// Prometheus 매핑 업데이트
355360
equipmentMappingService.updateEquipmentMapping(updatedEquipment);
356-
if (newRackId != null) {
357-
log.info("✅ 장비가 랙에 배치되어 메트릭 수집 시작 (Equipment ID: {}, Rack ID: {})",
358-
id, newRackId);
359-
} else {
360-
log.info("⊘ 장비가 랙에서 제거되어 메트릭 수집 중단 (Equipment ID: {})", id);
361+
362+
// ✅ 수정: Simulator 등록/제거
363+
EquipmentType type = updatedEquipment.getType();
364+
if (type == EquipmentType.SERVER || type == EquipmentType.STORAGE) {
365+
if (newRackId != null) {
366+
// 랙에 배치됨 → Simulator에 추가
367+
try {
368+
serverRoomDataSimulator.addEquipment(updatedEquipment);
369+
log.info("✅ 장비가 랙에 배치되어 메트릭 수집 시작 (Equipment ID: {}, Rack ID: {})",
370+
id, newRackId);
371+
} catch (Exception e) {
372+
log.error("⚠️ 시뮬레이터 등록 실패: {}", e.getMessage());
373+
}
374+
} else {
375+
// 랙에서 제거됨 → Simulator에서 제거
376+
try {
377+
serverRoomDataSimulator.removeEquipment(id);
378+
log.info("⊘ 장비가 랙에서 제거되어 메트릭 수집 중단 (Equipment ID: {})", id);
379+
} catch (Exception e) {
380+
log.error("⚠️ 시뮬레이터 제거 실패: {}", e.getMessage());
381+
}
382+
}
361383
}
362384
}
363385

0 commit comments

Comments
 (0)