diff --git a/projects/combinatory-chemistry/README.md b/projects/combinatory-chemistry/README.md index bf01098..a26442f 100644 --- a/projects/combinatory-chemistry/README.md +++ b/projects/combinatory-chemistry/README.md @@ -5,10 +5,10 @@ Towards a Simple Model of Emergent Evolution_. ## Requirements -The simulator requires the `lazy` library to run: +Install required libraries with: ```bash -pip install lazy +pip install -r requirements.txt ``` ## Running the simulation diff --git a/projects/combinatory-chemistry/main.py b/projects/combinatory-chemistry/main.py index 5b41b86..8b061ac 100644 --- a/projects/combinatory-chemistry/main.py +++ b/projects/combinatory-chemistry/main.py @@ -6,7 +6,6 @@ print('Loading...') import argparse -import plotille import os import time from collections import defaultdict, Counter diff --git a/projects/combinatory-chemistry/multiset.py b/projects/combinatory-chemistry/multiset.py index d61b649..c292621 100644 --- a/projects/combinatory-chemistry/multiset.py +++ b/projects/combinatory-chemistry/multiset.py @@ -5,19 +5,18 @@ # LICENSE file in the root directory of this source tree. from collections import Counter -from sampler import Sampler +import itertools +import bisect +import random class Multiset(object): def __init__(self, N): - self.item2id = {} - self.id2item = [] self.item_count = Counter() self.max_size = N - self.sampler = Sampler(N, N, 1) self.count = 0 def __contains__(self, item): - return item in self.item2id + return item in self.item_count def has_all(self, items): items = Counter(items) @@ -41,33 +40,21 @@ def items(self): return self.item_count.items() def unique(self): - return iter(self.id2item) + return self.item_count.keys() def __len__(self): return self.count def grow_capacity(self, n): - new_sampler = Sampler(self.max_size + n, self.max_size + n, 1) - for item, count in self.items(): - item_id = self.item2id[item] - new_sampler.add(item_id, count) - self.sampler = new_sampler self.max_size += n def add(self, item, item_count=1): assert self.count < self.max_size if not item in self: - item_id = len(self.id2item) - self.item2id[item] = item_id - self.id2item.append(item) self.item_count[item] = item_count - self.sampler.add(item_id, item_count) else: - item_id = self.item2id[item] c = self.item_count[item] self.item_count[item] += item_count - self.sampler.remove(item_id, c) - self.sampler.add(item_id, c + item_count) self.count += item_count def remove_all(self, item): @@ -81,32 +68,19 @@ def add_many(self, item, copies): def remove(self, item): assert item in self, item - item_id = self.item2id[item] c = self.item_count[item] - self.sampler.remove(item_id, c) if c == 1: del self.item_count[item] - del self.item2id[item] - # move another item to this id - last_item = self.id2item.pop() - last_item_id = len(self.id2item) - # unless this was the last item - if last_item_id != item_id: - self.id2item[item_id] = last_item - self.item2id[last_item] = item_id - last_item_count = self.item_count[last_item] - self.sampler.remove(last_item_id, last_item_count) - self.sampler.add(item_id, last_item_count) else: self.item_count[item] -= 1 - self.sampler.add(item_id, c - 1) self.count -= 1 def sample(self): - assert self.count == self.sampler.total_weight - item_id = self.sampler.sample() - return self.id2item[item_id] + choices, weights = zip(*self.item_count.items()) + cumdist = list(itertools.accumulate(weights)) + x = random.random() * cumdist[-1] + return choices[bisect.bisect(cumdist, x)] def sample_without_replacement(self, n): ret = [] diff --git a/projects/combinatory-chemistry/requirements.txt b/projects/combinatory-chemistry/requirements.txt new file mode 100644 index 0000000..c213297 --- /dev/null +++ b/projects/combinatory-chemistry/requirements.txt @@ -0,0 +1,3 @@ +lazy +networkx +cachetools diff --git a/projects/combinatory-chemistry/sampler.py b/projects/combinatory-chemistry/sampler.py deleted file mode 100644 index fc26eb5..0000000 --- a/projects/combinatory-chemistry/sampler.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# All rights reserved. -# -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. - - -import numbers -from numpy import * - -class Sampler: - - def __init__(self, max_entries, max_value=100, min_value=1): - self.nentries = 0 - self.max_entries = max_entries - self.max_value = max_value - self.min_value = min_value - self.top_level = int(ceil(log2(max_value))) - self.bottom_level = int(ceil(log2(min_value))) - self.nlevels = 1 + self.top_level - self.bottom_level - - self.total_weight = 0 - self.weights = zeros(max_entries, dtype='d') - - - self.level_weights = zeros(self.nlevels, dtype='d') - self.level_buckets = [[] for i in range(self.nlevels)] - self.level_max = [pow(2, self.top_level-i) for i in range(self.nlevels)] - - def add(self, idx, weight): - if weight > self.max_value or weight < self.min_value: - raise Exception("Weight out of range: %1.2e" % weight) - - if idx < 0 or idx >= self.max_entries or not isinstance(idx, numbers.Integral): - raise Exception("Bad index: %s", idx) - - self.nentries += 1 - self.total_weight += weight - - self.weights[idx] = weight - - raw_level = int(ceil(log2(weight))) - level = self.top_level - raw_level - - self.level_weights[level] += weight - self.level_buckets[level].append(idx) - - def remove(self, idx, weight): - if weight > self.max_value or weight < self.min_value: - raise Exception("Weight out of range: %1.2e" % weight) - - if idx < 0 or idx >= self.max_entries or not isinstance(idx, numbers.Integral): - raise Exception("Bad index: %s", idx) - - raw_level = int(ceil(log2(weight))) - level = self.top_level - raw_level - - for idx_in_level in range(len(self.level_buckets[level])): - if self.level_buckets[level][idx_in_level] == idx: - break - else: - raise Exception("Index not found: ", idx) - - self.weights[idx] = 0.0 - self.total_weight -= weight - self.level_weights[level] -= weight - # Swap with last element for efficent delete - swap_idx = self.level_buckets[level].pop() - if idx != swap_idx: - self.level_buckets[level][idx_in_level] = swap_idx - self.nentries -= 1 - - def _sample(self): - - u = random.uniform(high=self.total_weight) - - # Sample a level using the CDF method - cumulative_weight = 0 - for i in range(self.nlevels): - cumulative_weight += self.level_weights[i] - level = i - if u < cumulative_weight: - break - - # Now sample within the level using rejection sampling - level_size = len(self.level_buckets[level]) - level_max = self.level_max[level] - reject = True - while reject: - idx_in_level = random.randint(0, level_size) - idx = self.level_buckets[level][idx_in_level] - idx_weight = self.weights[idx] - u_lvl = random.uniform(high=level_max) - if u_lvl <= idx_weight: - reject = False - - return (idx, level, idx_in_level, idx_weight) - - def sample(self): - return self._sample()[0] - - def sampleAndRemove(self): - (idx, level, idx_in_level, weight) = self._sample() - - # Remove it - self.weights[idx] = 0.0 - self.total_weight -= weight - self.level_weights[level] -= weight - # Swap with last element for efficent delete - swap_idx = self.level_buckets[level].pop() - self.level_buckets[level][idx_in_level] = swap_idx - self.nentries -= 1 - - return (idx, weight)