From e1189a46579ddfb77a30b3e0ef1c7bce61bd2f27 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:05:28 +0000 Subject: [PATCH 1/2] Initial plan From 31ff308306181629518fef65913801f5d436a78b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:07:11 +0000 Subject: [PATCH 2/2] Remove trailing whitespace from copyrightcheck.py Agent-Logs-Url: https://github.com/marklogic/pr-workflows/sessions/8161bf32-06e4-47c5-b637-c6c6e1ca0037 Co-authored-by: brijeshp56 <203762578+brijeshp56@users.noreply.github.com> --- scripts/copyrightcheck.py | 140 +++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 261be9c..863be3f 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -17,10 +17,10 @@ class CopyrightValidator: """Validates copyright headers in source files.""" - + # Common patterns for comment block terminators TRAILING_COMMENT_TERMINATORS = r'(\*/|-->|:\))\s*$' - + def __init__(self, config_file: str): """Initialize validator with configuration file.""" self.config = self._load_config(config_file) @@ -29,38 +29,38 @@ def __init__(self, config_file: str): if self.start_year is None: print("Error: 'startyear' must be specified in the configuration file.") sys.exit(1) - + # Get excluded files from config, default to empty set if not specified excluded_files_list = self.config.get('filesexcluded') if excluded_files_list is None: self.excluded_files = set() else: self.excluded_files = set(excluded_files_list) - + def _load_config(self, config_file: str) -> Dict[str, Any]: """Load configuration from plain text file. - + Supports both single-line and multiline filesexcluded values: - + Single-line: filesexcluded: README.MD,.github/* - + Mixed (inline value + continuation lines): filesexcluded: README.MD .github/* src/scripts/example.py - + Multiline only (empty inline value): filesexcluded: .github/* src/scripts/example.py - + Continuation lines are collected until an empty line or a new key: is found. """ config = {} - + print(f"📋 Loading copyright config from: {config_file}") - + try: with open(config_file, 'r') as f: content = f.read() @@ -68,23 +68,23 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: for line_num, line in enumerate(content.split('\n'), 1): print(f" {line_num:2d}: {line}") print() - + # Reset file pointer to beginning f.seek(0) - + current_multiline_key = None for line_num, line in enumerate(f, 1): line = line.strip() - + # Empty line ends any active multi-line block if not line: current_multiline_key = None continue - + # Skip comments if line.startswith('#'): continue - + # Detect key:value pairs — key must be a simple word (no path chars) if ':' in line: key_part, value_part = line.split(':', 1) @@ -93,14 +93,14 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: current_multiline_key = None key = key_candidate value = value_part.strip() - + if key == 'startyear': try: config['startyear'] = int(value) except ValueError: print(f"Error: Invalid start year '{value}'. Must be a valid integer.") sys.exit(1) - + elif key == 'filesexcluded': # Always initialise the list and activate multiline mode. # This supports: @@ -110,65 +110,65 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: config['filesexcluded'] = files current_multiline_key = 'filesexcluded' continue - + # Continuation line for an active multi-line key # Each line may contain one or more comma-separated entries if current_multiline_key == 'filesexcluded': entries = [e.strip() for e in line.split(',')] config['filesexcluded'].extend([e for e in entries if e]) - + print("✅ Parsed configuration:") for key, value in config.items(): print(f" {key}: {value}") print() - + return config - + except FileNotFoundError: print(f"Error: Configuration file '{config_file}' not found.") sys.exit(1) except Exception as e: print(f"Error reading configuration file: {e}") sys.exit(1) - + def _is_excluded(self, relative_path: str) -> bool: """Check if file should be excluded from copyright validation. - + Args: relative_path: File path relative to repository root """ relative_path = os.path.normpath(relative_path) - + # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) - if filename.startswith('.'): + if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True - + for excluded_pattern in self.excluded_files: excluded_pattern = os.path.normpath(excluded_pattern) - + # Check for exact match if relative_path == excluded_pattern: print(f"🚫 Excluding (exact match): {relative_path} matches {excluded_pattern}") return True - + # Check for pattern match (simple glob-like matching) if '*' in excluded_pattern: pattern = excluded_pattern.replace('*', '.*') if re.match(pattern, relative_path): print(f"🚫 Excluding (pattern match): {relative_path} matches {excluded_pattern}") return True - + print(f"✅ Including: {relative_path}") return False - - + + def _get_expected_copyright(self) -> str: """Generate expected copyright header.""" year_range = f"{self.start_year}-{self.current_year}" if self.start_year != self.current_year else str(self.current_year) return f"Copyright (c) {year_range} Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved." - + def _extract_copyright_from_content(self, content: str) -> str: """Extract copyright line from file content.""" lines = content.split('\n') @@ -181,7 +181,7 @@ def _extract_copyright_from_content(self, content: str) -> str: if cleaned_line.lower().startswith('copyright'): return cleaned_line return "" - + def _validate_copyright_format(self, copyright_line: str) -> bool: """Validate copyright line. Accepts any header of the form: @@ -211,7 +211,7 @@ def _validate_copyright_format(self, copyright_line: str) -> bool: return False # All conditions satisfied return True - + def validate_file(self, file_path: str) -> Dict[str, Any]: """Validate copyright in a single file.""" result = { @@ -222,55 +222,55 @@ def validate_file(self, file_path: str) -> Dict[str, Any]: 'found_copyright': '', 'expected_copyright': self._get_expected_copyright() } - + # Check if file is excluded if self._is_excluded(file_path): result['excluded'] = True result['valid'] = True # Excluded files are considered valid return result - + try: # Check if file exists if not os.path.exists(file_path): result['error'] = f"File not found: {file_path}" return result - + # Read file content with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() - + # Extract copyright line copyright_line = self._extract_copyright_from_content(content) result['found_copyright'] = copyright_line - + if not copyright_line: result['error'] = "No copyright header found" return result - + # Validate copyright format result['valid'] = self._validate_copyright_format(copyright_line) - + if not result['valid']: result['error'] = "Copyright format does not match expected format" - + except Exception as e: result['error'] = f"Error reading file: {str(e)}" - + return result - + def validate_files(self, file_paths: List[str], relative_paths: List[str] = None) -> List[Dict[str, Any]]: """Validate copyright in multiple files. - + Args: file_paths: Absolute paths to files for file operations relative_paths: Relative paths for exclusion checking (optional) """ results = [] - + # If no relative paths provided, use file_paths as-is if relative_paths is None: relative_paths = file_paths - + for file_path, relative_path in zip(file_paths, relative_paths): # Use relative path for exclusion checking if self._is_excluded(relative_path): @@ -283,14 +283,14 @@ def validate_files(self, file_paths: List[str], relative_paths: List[str] = None 'found_copyright': None }) continue - + # Use absolute path for file operations result = self.validate_file(file_path) result['relative_path'] = relative_path results.append(result) - + return results - + def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): """Print validation results.""" MARKER_START = "<<>>" @@ -312,7 +312,7 @@ def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): else: counts_line += f" | at: {ts}" print(counts_line) - print() + print() has_invalid = invalid_files > 0 if has_invalid: @@ -366,7 +366,7 @@ def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): print("2. Ensure the year range matches the configuration (start year through current year).") print("3. Do not alter spacing or punctuation in the header line.") print("4. Commit and push the changes to update this check.") - print() + print() if not has_invalid: print("✅ All files have valid copyright headers!\n") @@ -386,47 +386,47 @@ def main(): echo "file1.py\nfile2.js" | python copyrightcheck.py -c config.yml --files-from-stdin """ ) - + parser.add_argument( '-c', '--config', required=True, help='Path to copyright configuration file' ) - + parser.add_argument( '-w', '--working-dir', help='Working directory for resolving relative file paths (default: current directory)' ) - + parser.add_argument( 'files', nargs='*', help='Files to check for copyright headers (relative to working-dir if specified)' ) - + parser.add_argument( '--files-from-stdin', action='store_true', help='Read file paths from standard input (one per line)' ) - + parser.add_argument( '-v', '--verbose', action='store_true', help='Show detailed output including valid and excluded files' ) - + parser.add_argument( '--origins-file', help='Optional file containing origin metadata for each file (ignored by validator)', required=False ) - + args = parser.parse_args() - + # Get file paths file_paths = [] - + if args.files_from_stdin: # Read file paths from stdin for line in sys.stdin: @@ -435,24 +435,24 @@ def main(): file_paths.append(file_path) else: file_paths = args.files - + if not file_paths: print("Error: No files specified. Use positional arguments or --files-from-stdin.") sys.exit(1) - + # Initialize validator validator = CopyrightValidator(args.config) - + # Set working directory if specified working_dir = args.working_dir or os.getcwd() if args.working_dir: print(f"📂 Working directory: {working_dir}") - + # Convert file paths to absolute paths for file operations # but keep relative paths for exclusion checking absolute_file_paths = [] relative_file_paths = [] - + for file_path in file_paths: if os.path.isabs(file_path): # Already absolute - convert to relative for exclusion checking @@ -469,13 +469,13 @@ def main(): absolute_path = os.path.join(working_dir, file_path) absolute_file_paths.append(absolute_path) relative_file_paths.append(file_path) - + # Validate files using absolute paths for file ops, relative for exclusion results = validator.validate_files(absolute_file_paths, relative_file_paths) - + # Print results validator.print_results(results, verbose=args.verbose) - + # Exit with error code if any files are invalid invalid_count = sum(1 for r in results if not r['valid'] and not r['excluded']) if invalid_count > 0: