Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions domains/anomaly-detection/anomalyDetectionPython.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,23 @@ anomaly_detection_labels() {

local language
language=$( extractQueryParameter "projection_language" "${@}" )

echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Labelling ${language} ${nodeLabel} anomalies..."

# Within the absolute (full) report directory for anomaly detection, create a sub directory for every detailed type (Java_Package, Java_Type,...)
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}"
mkdir -p "${detail_report_directory}"

execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeRemoveLabels.cypher" "${@}"
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeAuthority.cypher" "${@}"
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBottleneck.cypher" "${@}"
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeHub.cypher" "${@}"
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBridge.cypher" "${@}"
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}"
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeAuthority.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopAuthority.csv"
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBottleneck.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopBottleneck.csv"
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeHub.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopHub.csv"
# The following two label types require Python scripts to run first.
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBridge.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopBridge.csv"
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopOutlier.csv"
# Output the top anomalies and their archetype + rank
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionTopAnomalies.cypher" "${@}" > "${detail_report_directory}/TopAnomalies.csv"

}

# Run the anomaly detection pipeline.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// List top anomalies

MATCH (codeUnit)
WHERE $projection_node_label IN labels(codeUnit)
AND codeUnit.anomalyScore > 0
AND codeUnit.anomalyLabel = 1
ORDER BY codeUnit.anomalyScore DESC
LIMIT 50
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
WITH *, artifact.name AS artifactName
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
WITH *, coalesce(artifactName, projectName) AS projectName
RETURN projectName
,codeUnit.name AS shortCodeUnitName
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
,codeUnit.anomalyRank AS anomalyRank
,codeUnit.anomalyScore AS anomalyScore
,coalesce(codeUnit.anomalyAuthorityRank, 0) AS authorityRank
,coalesce(codeUnit.anomalyBottleneckRank, 0) AS bottleneckRank
,coalesce(codeUnit.anomalyBridgeRank, 0) AS bridgeRank
,coalesce(codeUnit.anomalyHubRank, 0) AS hubRank
,coalesce(codeUnit.anomalyOutlierRank, 0) AS outlierRank
,codeUnit.anomalyTopFeature1 AS topFeature1
,codeUnit.anomalyTopFeature2 AS topFeature2
,codeUnit.anomalyTopFeature3 AS topFeature3
,codeUnit.anomalyTopFeatureSHAPValue1 AS topFeature1Score
,codeUnit.anomalyTopFeatureSHAPValue2 AS topFeature2Score
,codeUnit.anomalyTopFeatureSHAPValue3 AS topFeature3Score
3 changes: 3 additions & 0 deletions domains/anomaly-detection/tunedAnomalyDetectionExplained.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ def add_anomaly_detection_results_to_features(
anomaly_detection_results: AnomalyDetectionResults,
anomaly_label_column: str = 'anomalyLabel',
anomaly_score_column: str = 'anomalyScore',
anomaly_rank_column: str = 'anomalyRank'
) -> pd.DataFrame:
"""
Adds anomaly detection results to the feature and returns the updated dataframe.
Expand All @@ -549,6 +550,7 @@ def add_anomaly_detection_results_to_features(
# Add anomaly labels and scores to the feature matrix
features[anomaly_label_column] = anomaly_detection_results.anomaly_labels
features[anomaly_score_column] = anomaly_detection_results.anomaly_scores
features[anomaly_rank_column] = features[anomaly_score_column].rank(method='dense', ascending=False).astype(int)
return features


Expand Down Expand Up @@ -1250,6 +1252,7 @@ def output_top_shap_explained_global_features_as_markdown_table(
'nodeElementId': features["nodeElementId"],
'anomalyLabel': features['anomalyLabel'].astype(int),
'anomalyScore': features['anomalyScore'],
'anomalyRank': features['anomalyRank'],
'anomalyTopFeature1': features['anomalyTopFeature_1'],
'anomalyTopFeature2': features['anomalyTopFeature_2'],
'anomalyTopFeature3': features['anomalyTopFeature_3'],
Expand Down
Loading