Skip to content
This repository was archived by the owner on Jan 23, 2024. It is now read-only.

Commit 0e4b36d

Browse files
committed
Handle symlinks in module search by deduplicating at the right place.
1. What do we need to dedup? Only directories, not files. Example 1: Need to deduplicate directories. Assume: sys.path = ['a/b', 'd'] Where: a/b/c.py d -> a/b (symlink) Then, a breakpoint at 'c.py:12' must have only 1 match: a/b/c.py Example 2: Assume: sys.path = ['.', 'a/b'] Where: a/b/c.py d.py -> a/b/c.py Then, a breakpoint at 'c.py:12' must have only 1 match: a/b/c.py And, a breakpoint at 'd.py:23' must have only 1 match: d.py (Python compiler/interpreter treats d.py as a separate module) 2. Where do we dedup? We can dedup in two places: (a) When gathering all directories we explore. (b) When adding a path to 'candidates' set. Solution (a) requires remembering all directories we have already explored. Solution (b) is simpler, but is computationally more expensive as we search same directories multiple times. This CL uses solution (b). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=167213003
1 parent 2951223 commit 0e4b36d

File tree

4 files changed

+73
-27
lines changed

4 files changed

+73
-27
lines changed

src/googleclouddebugger/imphook.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import sys # Must be imported, otherwise import hooks don't work.
1919
import threading
2020

21+
import module_utils
22+
2123
# Callbacks to invoke when a module is imported.
2224
_import_callbacks = {}
2325
_import_callbacks_lock = threading.Lock()
@@ -189,12 +191,13 @@ def _InvokeImportCallback(module):
189191
if not module:
190192
return
191193

192-
path = getattr(module, '__file__', None)
193-
if not path:
194+
mod_path = getattr(module, '__file__', None)
195+
if not mod_path:
194196
return
195197

196-
path, unused_ext = os.path.splitext(os.path.abspath(path))
197-
callbacks = _import_callbacks.get(path)
198+
mod_abspath = module_utils.GetAbsolutePath(mod_path)
199+
mod_abspath, unused_ext = os.path.splitext(mod_abspath)
200+
callbacks = _import_callbacks.get(mod_abspath)
198201
if not callbacks:
199202
return # Common code path.
200203

src/googleclouddebugger/module_search.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import time
2121

2222
import cdbg_native as native
23+
import module_utils
2324

2425

2526
def _CommonPathSuffixLen(paths):
@@ -43,13 +44,15 @@ def FindMatchingFiles(location_path):
4344
"""Returns a list of absolute filenames of best matching modules/packages."""
4445

4546
def AddCandidate(mod_path):
46-
suffix_len = _CommonPathSuffixLen([src_path, mod_path])
47+
# We must sanitize the module path before using it for proper deduplication.
48+
mod_abspath = module_utils.GetAbsolutePath(mod_path)
49+
suffix_len = _CommonPathSuffixLen([src_path, mod_abspath])
4750
if suffix_len < longest_suffix_len[0]:
4851
return
4952
if suffix_len > longest_suffix_len[0]:
5053
candidates.clear()
5154
longest_suffix_len[0] = suffix_len
52-
candidates.add(mod_path)
55+
candidates.add(mod_abspath)
5356

5457
# We measure the time it takes to execute the scan.
5558
start_time = time.time()
@@ -63,54 +66,54 @@ def AddCandidate(mod_path):
6366

6467
# Using mutable vars to make them available in nested functions.
6568

66-
# The set of module/package path w/ no extension. Contains absolute paths.
69+
# The set of module/package path w/ no extension. Use AddCandidate() to insert
70+
# into this set.
6771
candidates = set()
6872

6973
# Init longest_suffix_len to 1 to avoid inserting zero length suffixes.
7074
longest_suffix_len = [1]
7175

7276
# Search paths for modules and packages, init with system search paths.
73-
search_abspaths = set(os.path.abspath(path) for path in sys.path)
77+
search_paths = set(path for path in sys.path)
7478

7579
# Add search paths from the already loaded packages and add matching modules
7680
# or packages to the candidates list.
7781
for module in sys.modules.values():
7882
# Extend the search paths with packages path and modules file directory.
79-
# Note that __path__ only exist for packages, but does not have to be
80-
# absolute path as the user can overwrite it.
81-
search_abspaths |= frozenset(
82-
os.path.abspath(path) for path in getattr(module, '__path__', []))
83+
# Note that __path__ only exist for packages.
84+
search_paths |= frozenset(getattr(module, '__path__', []))
8385
mod_path = os.path.splitext(getattr(module, '__file__', ''))[0]
86+
8487
if not mod_path:
8588
continue
86-
mod_abspath = os.path.abspath(mod_path)
87-
search_abspaths.add(os.path.dirname(mod_abspath))
89+
90+
search_paths.add(os.path.dirname(mod_path))
8891
# Add loaded modules to the candidates set.
89-
if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_abspath):
90-
AddCandidate(mod_abspath)
92+
if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_path):
93+
AddCandidate(mod_path)
9194

9295
# Walk the aggregated search path and loook for modules or packages.
9396
# By searching one path at the time we control the module file name
9497
# without having to load it.
9598
# TODO(erezh): consider using the alternative impl in cr/165133821 which
9699
# only uses os file lookup and not using pkgutil. The alternative is faster
97100
# but is making many more assuptions that this impl does not.
98-
while search_abspaths:
101+
while search_paths:
99102
num_dirs_scanned += 1
100-
abspath = search_abspaths.pop()
101-
for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([abspath]):
102-
mod_abspath = os.path.join(abspath, mod_name)
103+
path = search_paths.pop()
104+
for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]):
105+
mod_path = os.path.join(path, mod_name)
103106
if mod_ispkg:
104-
search_abspaths.add(mod_abspath)
105-
mod_abspath = os.path.join(mod_abspath, '__init__')
107+
search_paths.add(mod_path)
108+
mod_path = os.path.join(mod_path, '__init__')
106109
if src_ispkg == mod_ispkg and src_name == mod_name:
107-
AddCandidate(mod_abspath)
110+
AddCandidate(mod_path)
108111

109112
# Sort the list to return a stable result to the user.
110113
# TODO(erezh): No need to add the .py extenssion, this is done just for
111114
# compatabilty with current code. Once refactored not to use file extension
112115
# this code can be removed to just return the sorted candidates.
113-
candidates = sorted(abspath + '.py' for abspath in candidates)
116+
candidates = sorted(path + '.py' for path in candidates)
114117

115118
# Log scan stats, without the files list to avoid very long output as well as
116119
# the potential leak of system files that the user has no access to.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2015 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS-IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Provides utility functions for module path processing.
16+
"""
17+
18+
19+
import os
20+
21+
22+
def GetAbsolutePath(mod_path):
23+
"""Flattens symlinks and indirections in the module path.
24+
25+
To uniquely identify each module file, the file path must be sanitized
26+
by following all symbolic links and normalizing to an absolute path.
27+
28+
Note that the module file (i.e., .py/.pyc/.pyo file) itself can be a
29+
symbolic link, but we must *NOT* follow that symbolic link.
30+
31+
Args:
32+
mod_path: A path that represents a module file.
33+
34+
Returns:
35+
The sanitized version of mod_path.
36+
"""
37+
pkg_path, file_name = os.path.split(mod_path)
38+
pkg_path = os.path.abspath(os.path.realpath(pkg_path))
39+
return os.path.join(pkg_path, file_name)
40+

src/googleclouddebugger/python_breakpoint.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import imphook
2626
import module_explorer
2727
import module_search
28+
import module_utils
2829

2930
# TODO(vlif): move to messages.py module.
3031
# Use the following schema to define breakpoint error message constant:
@@ -95,9 +96,8 @@ def _GetLoadedModuleByPath(abspath):
9596
if not path:
9697
continue # This is a built-in module.
9798

98-
# module.__file__ may be relative to the current directory, so we first
99-
# convert it into absolute path.
100-
module_abspath = os.path.abspath(path)
99+
# module.__file__ may be relative or may contain symlinks inside it.
100+
module_abspath = module_utils.GetAbsolutePath(path)
101101

102102
# Ignore file extension while comparing the file paths (e.g., /foo/bar.py vs
103103
# /foo/bar.pyc should match).

0 commit comments

Comments
 (0)