Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,18 @@
query += ` WHERE ${whereConditions.join(' AND ')} ALLOW FILTERING`;
}

const countQuery =
`SELECT COUNT(*) as count FROM ${tableName.toLowerCase()}` +
(whereConditions.length > 0 ? ` WHERE ${whereConditions.join(' AND ')} ALLOW FILTERING` : '');
const countResult = await client.execute(countQuery, params, { prepare: true });
const totalCount = parseInt(countResult.rows[0].count?.toString() || '0', 10);

const result = await client.execute(query, params, { prepare: true });
const {
totalCount,
isEstimated,
result: preExecutedResult,
} = await this.getRowCountWithFallback(client, tableName, whereConditions, params, query);

let result: cassandra.types.ResultSet;
if (preExecutedResult) {
result = preExecutedResult;
} else {
result = await client.execute(query, params, { prepare: true });
}
const startIndex = (page - 1) * perPage;

const allRows = [...result.rows];
Expand Down Expand Up @@ -336,7 +341,7 @@
return {
data: rows,
pagination,
large_dataset: totalCount > DAO_CONSTANTS.LARGE_DATASET_ROW_LIMIT,
large_dataset: totalCount > DAO_CONSTANTS.LARGE_DATASET_ROW_LIMIT || isEstimated,
};
} catch (error) {
throw new Error(`Failed to get rows from table: ${error.message}`);
Expand Down Expand Up @@ -700,6 +705,83 @@
}
}

private isAWSConnection(): boolean {
const { host } = this.connection;

if (host.includes('cassandra') && host.includes('amazonaws.com')) {

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

'
amazonaws.com
' can be anywhere in the URL, and arbitrary hosts may come before or after it.

Copilot Autofix

AI 11 months ago

To fix the issue, we need to parse the host value and validate it against a whitelist of allowed AWS domains or patterns. Instead of using host.includes('amazonaws.com'), we should use a stricter check that ensures the host ends with .amazonaws.com and does not contain any unexpected subdomains. This can be achieved using a combination of URL parsing and regular expressions.

The fix involves:

  1. Parsing the host value to ensure it is a valid domain.
  2. Checking that the host ends with .amazonaws.com and matches expected patterns (e.g., *.amazonaws.com or ec2-*.compute.amazonaws.com).
  3. Replacing the substring checks with these stricter validations.

Suggested changeset 1
shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts b/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
--- a/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
+++ b/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
@@ -710,13 +710,9 @@
 
-    if (host.includes('cassandra') && host.includes('amazonaws.com')) {
-      return true;
-    }
-
-    if (host.includes('amazonaws.com')) {
-      return true;
-    }
-
+    const awsHostRegex = /^([a-zA-Z0-9-]+\.)*amazonaws\.com$/i;
     const ec2HostRegex = /^(ec2-).*([.]compute[.]amazonaws[.]com)$/i;
-    if (ec2HostRegex.test(host)) {
-      return true;
+
+    if (awsHostRegex.test(host)) {
+      if (host.includes('cassandra') || ec2HostRegex.test(host)) {
+        return true;
+      }
     }
EOF
@@ -710,13 +710,9 @@

if (host.includes('cassandra') && host.includes('amazonaws.com')) {
return true;
}

if (host.includes('amazonaws.com')) {
return true;
}

const awsHostRegex = /^([a-zA-Z0-9-]+\.)*amazonaws\.com$/i;
const ec2HostRegex = /^(ec2-).*([.]compute[.]amazonaws[.]com)$/i;
if (ec2HostRegex.test(host)) {
return true;

if (awsHostRegex.test(host)) {
if (host.includes('cassandra') || ec2HostRegex.test(host)) {
return true;
}
}
Copilot is powered by AI and may make mistakes. Always verify output.
return true;
}

if (host.includes('amazonaws.com')) {

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

'
amazonaws.com
' can be anywhere in the URL, and arbitrary hosts may come before or after it.

Copilot Autofix

AI 11 months ago

To fix the problem, we should parse the host string and check if it matches exactly amazonaws.com or is a subdomain of amazonaws.com (e.g., cassandra.amazonaws.com, ec2-xx-xx-xx-xx.compute.amazonaws.com). This can be done by splitting the host into its labels and checking if the last two labels are amazonaws.com, or by using a regular expression that matches only valid AWS hostnames. The fix should be applied in the isAWSConnection method, replacing the substring check with a more robust check. No new dependencies are required, as this can be done with standard string or regex operations.


Suggested changeset 1
shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts b/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
--- a/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
+++ b/shared-code/src/data-access-layer/data-access-objects/data-access-object-cassandra.ts
@@ -710,3 +710,7 @@
 
-    if (host.includes('cassandra') && host.includes('amazonaws.com')) {
+    // Check for AWS Cassandra service host
+    if (
+      host === 'cassandra.amazonaws.com' ||
+      host.endsWith('.cassandra.amazonaws.com')
+    ) {
       return true;
@@ -714,3 +718,7 @@
 
-    if (host.includes('amazonaws.com')) {
+    // Check for any subdomain of amazonaws.com (e.g., ec2-xx-xx-xx-xx.compute.amazonaws.com)
+    if (
+      host === 'amazonaws.com' ||
+      host.endsWith('.amazonaws.com')
+    ) {
       return true;
EOF
@@ -710,3 +710,7 @@

if (host.includes('cassandra') && host.includes('amazonaws.com')) {
// Check for AWS Cassandra service host
if (
host === 'cassandra.amazonaws.com' ||
host.endsWith('.cassandra.amazonaws.com')
) {
return true;
@@ -714,3 +718,7 @@

if (host.includes('amazonaws.com')) {
// Check for any subdomain of amazonaws.com (e.g., ec2-xx-xx-xx-xx.compute.amazonaws.com)
if (
host === 'amazonaws.com' ||
host.endsWith('.amazonaws.com')
) {
return true;
Copilot is powered by AI and may make mistakes. Always verify output.
return true;
}

const ec2HostRegex = /^(ec2-).*([.]compute[.]amazonaws[.]com)$/i;
if (ec2HostRegex.test(host)) {
return true;
}

return false;
}

private async getRowCountWithFallback(
client: cassandra.Client,
tableName: string,
whereConditions: string[],
params: any[],
query: string,
): Promise<{ totalCount: number; isEstimated: boolean; result?: cassandra.types.ResultSet }> {
const isAWS = this.isAWSConnection();

if (isAWS) {
return this.estimateRowCount(client, tableName, whereConditions, params, query);
}

try {
const countQuery =
`SELECT COUNT(*) as count FROM ${tableName.toLowerCase()}` +
(whereConditions.length > 0 ? ` WHERE ${whereConditions.join(' AND ')} ALLOW FILTERING` : '');
const countResult = await client.execute(countQuery, params, { prepare: true });
const totalCount = parseInt(countResult.rows[0].count?.toString() || '0', 10);
return { totalCount, isEstimated: false };
} catch (countError) {
console.warn(`COUNT query failed, falling back to estimation: ${countError.message}`);
return this.estimateRowCount(client, tableName, whereConditions, params, query);
}
}

private async estimateRowCount(
client: cassandra.Client,
tableName: string,
whereConditions: string[],
params: any[],
query: string,
): Promise<{ totalCount: number; isEstimated: boolean; result: cassandra.types.ResultSet }> {
try {
const result = await client.execute(query, params, { prepare: true });
let estimatedCount = result.rows.length;
if (whereConditions.length === 0) {
try {
const sampleQuery = `SELECT * FROM ${tableName.toLowerCase()} LIMIT 10000`;
const sampleResult = await client.execute(sampleQuery, [], { prepare: true });

if (sampleResult.rows.length >= 10000) {
estimatedCount = Math.max(estimatedCount, sampleResult.rows.length * 2);
} else {
estimatedCount = Math.max(estimatedCount, sampleResult.rows.length);
}
} catch (sampleError) {
console.warn(`Row count sampling failed: ${sampleError.message}`);
}
}

return { totalCount: estimatedCount, isEstimated: true, result };
} catch (error) {
console.warn(`Row count estimation failed: ${error.message}`);
return { totalCount: 100, isEstimated: true, result: null };
}
}

private async getCassandraClient(): Promise<cassandra.Client> {
const cachedClient = LRUStorage.getCassandraClientCache(this.connection);

Expand Down
Loading