diff --git a/127. Word Ladder.md b/127. Word Ladder.md new file mode 100644 index 0000000..79ba56f --- /dev/null +++ b/127. Word Ladder.md @@ -0,0 +1,431 @@ +### Step1 + +- 最初に書いたコード +- よく考えたらNodeを作る必要はない(隣接リストでOK) +- 例外処理が分散してるけどうまく書けないかなあ + - Nodeにしなければ、calculate_shortest_path_inclusiveの中にまとめられるかも +- inclusiveという名前は、両端含むwordの数というのをうまく表せているだろうか? + +```python + +class Solution: + class Node: + def __init__(self, val): + self.val = val + self.next = [] + + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + def connect_nodes_of_adjacent_words(words): + for i in range(len(words)): + for j in range(i + 1, len(words)): + word1 = words[i] + word2 = words[j] + for removed_index in range(len(word1)): + removed1 = word1[:removed_index] + word1[removed_index + 1:] + removed2 = word2[:removed_index] + word2[removed_index + 1:] + if removed1 == removed2: + word_to_node[word1].next.append(word_to_node[word2]) + word_to_node[word2].next.append(word_to_node[word1]) + + def calculate_shortest_path_inclusive(start_node, end_node): + node_to_distance = {} + nodes = deque([start_node]) + node_to_distance[start_node] = 0 + seen.add(start_node) + while nodes: + node = nodes.popleft() + for next_node in node.next: + if next_node in seen: + continue + nodes.append(next_node) + seen.add(next_node) + node_to_distance[next_node] = node_to_distance[node] + 1 + if end_node not in seen: + return 0 + return node_to_distance[end_node] + 1 + + if endWord not in wordList: + return 0 + word_to_node = {} + wordList.append(beginWord) + for i, word in enumerate(wordList): + word_to_node[word] = self.Node(i) + seen = set() + connect_nodes_of_adjacent_words(wordList) + return calculate_shortest_path_inclusive(word_to_node[beginWord], word_to_node[endWord]) +``` + +### Step2 + +Nodeというクラスを持つのにこだわりすぎた。普通に直で隣接リスト持てばよかった + +https://github.com/Ryotaro25/leetcode_first60/pull/22 + +https://github.com/TORUS0818/leetcode/pull/22 + +- endWordが見つかったタイミングでreturnすればよかった +- どのタイミングでseenを入れるか、nextで重なるのは処理が重いかも(上はたまたまnextで重ならないようになっているが、意識していなかった +- キャッシュと遅延評価に関する議論 + +https://github.com/hayashi-ay/leetcode/pull/42 + +- まずは、遅延評価で実装。 + - 繋がっていない例外のreturn 0は前に出したかったので、while Trueを利用 +- fhiyoさんの実装を参考にgeneratorで実装 + +```python + +class Solution: + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + def is_adjacent(word1, word2): + distance = 0 + for c1, c2 in zip(word1, word2, strict=True): + if c1 != c2: + distance += 1 + if distance > 1: + return False + return distance == 1 + + def generate_adjacent_words(word, candidates): + for candidate_word in candidates: + if candidate_word in seen: + continue + if is_adjacent(word, candidate_word): + seen.add(candidate_word) + yield candidate_word + + num_words_so_far = {} + num_words_so_far[beginWord] = 1 + connecting_words = deque([beginWord]) + seen = set() + # while connecting_wordsと迷ったが、return 0が前にある方がわかりやすいかもと思った + while True: + if not connecting_words: + return 0 + word = connecting_words.popleft() + if word == endWord: + return num_words_so_far[endWord] + for next_word in generate_adjacent_words(word, wordList): + num_words_so_far[next_word] = num_words_so_far[word] + 1 + connecting_words.append(next_word) +``` + +- 遅延評価に、さらにキャッシュを利用でやってみた + - キャッシュを利用して半分にする[この辺の](https://discord.com/channels/1084280443945353267/1200089668901937312/1215904902102913114)議論、いまいち理解できたか自信ないけどこういう実装のことかな + - word1→word2とたどる場合と、word2→ word1の場合があるので、それを同じと捉えればハミング距離の計算が半分になる + - BFSでの最短距離は、今度は配列を使わずnextのdequeを用意 + - MLEになったりTLEになったり + +```python + +class Solution: + @cache + def is_adjacent(self, word1, word2): + distance = 0 + for c1, c2 in zip(word1, word2, strict=True): + if c1 != c2: + distance += 1 + if distance > 1: + return False + return distance == 1 + + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + def generate_adjacent_words(word, candidates): + for candidate_word in candidates: + if candidate_word in seen: + continue + word1 = word + word2 = candidate_word + if id(word1) > id(word2): + word1, word2 = word2, word1 + if self.is_adjacent(word1, word2): + seen.add(candidate_word) + yield candidate_word + + current_words = [beginWord] + num_words_so_far = 0 + seen = set() + seen.add(beginWord) + while True: + if not current_words: + return 0 + next_words = [] + num_words_so_far += 1 + for word in current_words: + if word == endWord: + return num_words_so_far + next_words.extend(generate_adjacent_words(word, wordList)) + current_words = next_words +``` + +- 今度は、隣接している単語を探すのに1文字ずつ置き換えて探索 +- ダイクストラも隣接リストで実装 + +```python +python +class Solution: + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + def generate_adjacent_words(word): + for i in range(len(word)): + for c in ascii_lowercase: + if c == word[i]: + continue + one_diff_word = word[:i] + c + word[i + 1:] + if one_diff_word not in wordList: + continue + yield one_diff_word + + # ダイクストラで実装 + words = wordList + [beginWord] + visited = set() + adjacent = defaultdict(list) + for word in words: + for adjacent_word in generate_adjacent_words(word): + adjacent[word].append((1, adjacent_word)) + word_and_num_so_far = [(1, beginWord)] + while True: + if not word_and_num_so_far: + return 0 + num_so_far, word = heapq.heappop(word_and_num_so_far) + if word == endWord: + return num_so_far + for distance, adjacent_word in adjacent[word]: + if adjacent_word in visited: + continue + heapq.heappush(word_and_num_so_far, (num_so_far + distance, adjacent_word)) + visited.add(adjacent_word) +``` + +- 素早くhamming distanceを求める謎のアルゴリズム(https://cs.stackexchange.com/questions/93467/data-structure-or-algorithm-for-quickly-finding-differences-between-strings) + - 半分まで見て辞書に入れるの賢い + - 今までが8000ms台だったが、400ms台になり圧倒的に速い! + +```python + +class Solution: + def collect_half_same_words(self, wordList): + left_half_to_right = defaultdict(list) + right_half_to_left = defaultdict(list) + for word in wordList: + left_half = word[:len(word)//2] + right_half = word[len(word)//2:] + left_half_to_right[left_half].append(right_half) + right_half_to_left[right_half].append(left_half) + return left_half_to_right, right_half_to_left + + def is_word_diff_one_char(self, word1, word2): + diff_count = 0 + for c1, c2 in zip(word1, word2, strict=True): + if c1 == c2: + continue + diff_count += 1 + if diff_count > 1: + return False + return diff_count == 1 + + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + def generate_adjacent_words(word): + left_half = word[:len(word)//2] + right_half = word[len(word)//2:] + for candidate in left_half_to_right[left_half]: + if not self.is_word_diff_one_char(right_half, candidate): + continue + yield left_half + candidate + for candidate in right_half_to_left[right_half]: + if not self.is_word_diff_one_char(left_half, candidate): + continue + yield candidate + right_half + + checked = {} + left_half_to_right, right_half_to_left = self.collect_half_same_words(wordList) + words = [beginWord] + transfered_words_count_so_far = 0 + checked = set() + while True: + if not words: + return 0 + transfered_words_count_so_far += 1 + next_words = [] + for word in words: + if word == endWord: + return transfered_words_count_so_far + for next_word in generate_adjacent_words(word): + if next_word in checked: + continue + next_words.append(next_word) + checked.add(next_word) + words = next_words +``` + +- lru_cacheの自前実装 + - [CPythonの該当部分](https://github.com/python/cpython/blob/3.12/Lib/functools.py#L477)を読んでみた + - 以下のはCPythonに従った。なぜCPythonがそういう実装なのかはよくわからない。 + - PREV, NEXTなどへの参照を配列で実装していた + - 最大の保存長を超えたとき、なぜかrootに新しい要素を入れて、root.nextを新たなrootにしていた + - lru_cache_wrapperは、「ユーザー入力の引数を入れた、cache処理をしているuser_functionのwrapper」を返す関数のはずだが、なぜ@lru_cahceを使った関数を何回も呼ぶときに、lru_cache_wrapperが複数回呼ばれず、user_function_wrapperが複数回呼ばれるかが理解できない、、、 + +```python +def lru_cache(max_count=128): + def decorate_user_function(user_function): + result = lru_cache_wrapper(user_function, max_count) + return result + return decorate_user_function + +def lru_cache_wrapper(user_function, max_count): + cache = CachedDoublyLinkedList() + + def user_function_wrapper(*args): + link = cache.get_link(args) + if link: + cache.move_to_last_link(args, link) + return cache.get_result(link) + result = user_function(*args) + if cache.len >= max_count: + cache.insert_root(args, result) + else: + cache.insert_last(args, result) + return result + + return user_function_wrapper + +class CachedDoublyLinkedList: + PREV = 0 + NEXT = 1 + RESULT = 2 + KEY = 3 + + def __init__(self): + self._cache = {} + self._root = [] + self._root[:] = [self._root, self._root, None, None] + self.len = 0 + + def get_link(self, args): + return self._cache.get(args) + + def get_result(self, link): + return link[self.RESULT] + + def insert_root(self, args, result): + old_root = self._root + old_root[self.KEY] = args + old_root[self.RESULT] = result + self._cache[args] = old_root + self._root = old_root[self.NEXT] + self._cache[self._root[self.KEY]] = None + self._root[self.KEY] = None + self._root[self.RESULT] = None + + def insert_last(self, args, result): + old_last = self._root[self.PREV] + last = [old_last, self._root, result, args] + self._root[self.PREV] = last + old_last[self.NEXT] = last + self.len += 1 + self._cache[args] = last + + def move_to_last_link(self, args, link): + link[self.NEXT][self.PREV] = link[self.PREV] + link[self.PREV][self.NEXT] = link[self.NEXT] + last = self._root[self.PREV] + last[self.NEXT] = link + self._root[self.PREV] = link + link[self.NEXT] = self._root + link[self.PREV] = last + self._cache[args] = link + +``` + +- @がどのようにして「関数を返す関数」になるのか気になったので調査 + - [ここ](https://github.com/python/cpython/blob/327463aef173a1cb9659bccbecfff4530bbe6bbf/Parser/parser.c#L4406)でparseしようとしている + - その中の具体的なdecoratorのparseルールは[ここ](https://github.com/python/cpython/blob/main/Grammar/python.gram#L245)? + - [ここ](https://github.com/python/cpython/blob/main/Parser/action_helpers.c#L723)でASTのnodeを構築? + - [ここ](https://github.com/python/cpython/blob/342e654b8eda24c68da64cc21bc9583e480d9e8e/Python/codegen.c#L841)でスタックに積まれた後、CALLを呼んで下から順に関数が適用されている? + + +## Step 追加 復習 +[ここ](https://discord.com/channels/1084280443945353267/1303605021597761649/1306631474065309728)にあるのが気になったので実装 +```python + +class ConstructAdjacentWords: + def __init__(self): + self.adjacent_words = defaultdict(list) + + def word_to_keys(self, word): + for i in range(len(word)): + yield (word[:i], word[i + 1:]) + + def add(self, word): + for key in self.word_to_keys(word): + self.adjacent_words[key].append(word) + + def generate_adjacent_words(self, word): + for key in self.word_to_keys(word): + for next_word in self.adjacent_words[key]: + yield next_word + +class Solution: + def ladderLength(self, beginWord: str, endWord: str, wordList: List[str]) -> int: + adjacent = ConstructAdjacentWords() + adjacent.add(beginWord) + for word in wordList: + adjacent.add(word) + result = 1 + words = [beginWord] + seen = set() + while words: + next_words = [] + for word in words: + if word == endWord: + return result + for next_word in adjacent.generate_adjacent_words(word): + if next_word in seen: + continue + next_words.append(next_word) + seen.add(next_word) + words = next_words + result += 1 + return 0 +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +