diff --git a/diskindexstore.go b/diskindexstore.go deleted file mode 100644 index 133c473..0000000 --- a/diskindexstore.go +++ /dev/null @@ -1,157 +0,0 @@ -package simian - -import ( - "crypto/sha256" - "encoding/hex" - "fmt" - "os" - "path" - - "github.com/mandykoh/keva" -) - -const nodeFingerprintFile = "fingerprint" -const nodeEntriesDir = "entries" -const thumbnailsDir = "thumbnails" - -type DiskIndexStore struct { - rootPath string - nodes *keva.Store -} - -func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error { - err := entry.saveThumbnail(s.pathForThumbnail(entry)) - if err != nil { - return err - } - - node.registerEntry(entry) - - fmt.Printf("AddEntry - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) - return s.nodes.Put(nodeFingerprint.String(), node) -} - -func (s *DiskIndexStore) Close() error { - return s.nodes.Close() -} - -func (s *DiskIndexStore) GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { - var node IndexNode - - err := s.nodes.Get(f.String(), &node) - if err == keva.ErrValueNotFound { - return nil, nil - - } else if err == nil { - err = s.loadThumbnails(&node) - if err != nil { - return nil, err - } - - } else { - return nil, err - } - - return &node, nil -} - -func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) { - fmt.Printf("GetOrCreateChild() %s\n", f.String()) - - nodeKey := f.String() - - var node IndexNode - err := s.nodes.Get(nodeKey, &node) - - if err == keva.ErrValueNotFound { - fmt.Printf("Creating child\n") - - node = IndexNode{ - childFingerprintsByString: make(map[string]*Fingerprint), - } - - fmt.Printf("GetOrCreateChild - Saving [%s] %d %d\n", nodeKey, len(node.childFingerprints), len(node.entries)) - err = s.nodes.Put(nodeKey, &node) - if err != nil { - return nil, err - } - - parent.registerChild(f) - fmt.Printf("GetOrCreateChild - Parent - Saving [%s] %d %d\n", parentFingerprint.String(), len(parent.childFingerprints), len(parent.entries)) - err = s.nodes.Put(parentFingerprint.String(), parent) - if err != nil { - return nil, err - } - - } else if err == nil { - err = s.loadThumbnails(&node) - if err != nil { - return nil, err - } - - } else { - return nil, err - } - - return &node, nil -} - -func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { - var rootKey = Fingerprint{}.String() - - var root IndexNode - err := s.nodes.Get(rootKey, &root) - - if err == keva.ErrValueNotFound { - fmt.Printf("Root node not found - creating it\n") - root = IndexNode{ - childFingerprintsByString: make(map[string]*Fingerprint), - } - - } else if err == nil { - fmt.Printf("Found root node with %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) - - err = s.loadThumbnails(&root) - if err != nil { - return nil, err - } - - } else { - return nil, err - } - - return &root, nil -} - -func (s *DiskIndexStore) RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error { - node.removeEntries() - fmt.Printf("RemoveEntries - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) - return s.nodes.Put(nodeFingerprint.String(), node) -} - -func (s *DiskIndexStore) loadThumbnails(n *IndexNode) error { - return n.withEachEntry(func(entry *IndexEntry) error { - return entry.loadThumbnail(s.pathForThumbnail(entry)) - }) -} - -func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { - thumbnailHash := sha256.Sum256(entry.MaxFingerprint.Bytes()) - thumbnailHex := hex.EncodeToString(thumbnailHash[:]) - return path.Join(s.rootPath, thumbnailsDir, thumbnailHex[0:2], thumbnailHex[2:4], thumbnailHex[4:]) -} - -func NewDiskIndexStore(rootPath string) (*DiskIndexStore, error) { - thumbnailsDir := path.Join(rootPath, thumbnailsDir) - os.MkdirAll(thumbnailsDir, os.FileMode(0700)) - - nodeStore, err := keva.NewStore(path.Join(rootPath, "nodes")) - if err != nil { - return nil, err - } - - return &DiskIndexStore{ - rootPath: rootPath, - nodes: nodeStore, - }, nil -} diff --git a/fingerprint.go b/fingerprint.go index 6e9f08e..43cfe2b 100644 --- a/fingerprint.go +++ b/fingerprint.go @@ -1,8 +1,7 @@ package simian import ( - "bytes" - "encoding/hex" + "fmt" "image" "image/color" "math" @@ -10,118 +9,113 @@ import ( "golang.org/x/image/draw" ) -const bitsPerSample = 4 -const sampleBitsMask = (1 << bitsPerSample) - 1 -const samplesPerByte = 8 / bitsPerSample +const fingerprintDCTSideLength = 8 +const fingerprintACShift = 7 +const fingerprintDifferenceScale = 22 -type Fingerprint struct { - samples []uint8 -} - -func (f *Fingerprint) Bytes() []byte { - packed := bytes.Buffer{} - current := byte(0) - bits := uint(8) - i := 0 - - for ; i < len(f.samples); i++ { - y := f.samples[i] +const SamplesPerFingerprint = fingerprintDCTSideLength * fingerprintDCTSideLength - bits -= bitsPerSample - current = (current << bitsPerSample) | (y >> (8 - bitsPerSample)) - - if bits == 0 { - packed.WriteByte(current) - current = 0 - bits = 8 - } - } +type Fingerprint [SamplesPerFingerprint]int16 - if bits < 8 { - current <<= bits - packed.WriteByte(current) +func (f *Fingerprint) Difference(other *Fingerprint) float64 { + result := 0.0 + for i := 0; i < SamplesPerFingerprint; i++ { + result += math.Abs(float64(f[i] - other[i])) } - return packed.Bytes() + return result / float64(SamplesPerFingerprint*fingerprintDifferenceScale) } -func (f *Fingerprint) Difference(to Fingerprint) (diff float64) { - return math.Min(float64(f.Distance(to))/float64(len(to.samples)*255), 1.0) +func (f *Fingerprint) Prefix(level int) []int16 { + return f[:level*level] } -func (f *Fingerprint) Distance(to Fingerprint) (dist uint64) { - if len(f.samples) != len(to.samples) { - return math.MaxUint64 - } +func NewFingerprintFromImage(src image.Image) *Fingerprint { + scaled := image.NewNRGBA(image.Rectangle{Max: image.Point{X: fingerprintDCTSideLength, Y: fingerprintDCTSideLength}}) + draw.BiLinear.Scale(scaled, scaled.Bounds(), src, src.Bounds(), draw.Src, nil) - for i := 0; i < len(f.samples); i++ { - if f.samples[i] > to.samples[i] { - dist += uint64(f.samples[i] - to.samples[i]) - } else { - dist += uint64(to.samples[i] - f.samples[i]) + samples := make([]int8, SamplesPerFingerprint) + offset := 0 + + // Sample from RGBA pixel values + for i := scaled.Bounds().Min.Y; i < scaled.Bounds().Max.Y; i++ { + for j := scaled.Bounds().Min.X; j < scaled.Bounds().Max.X; j++ { + r, g, b, _ := scaled.At(j, i).RGBA() + y, _, _ := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8)) + + val := int8(y - 128) + samples[offset] = val + offset++ } } - return dist -} + dct := DCT(fingerprintDCTSideLength, fingerprintDCTSideLength, samples) -func (f *Fingerprint) MarshalText() (text []byte, err error) { - bytes := f.Bytes() - result := make([]byte, hex.EncodedLen(len(bytes))) - - hex.Encode(result, bytes) - return result, nil -} + min := int16(math.MaxInt16) + max := int16(math.MinInt16) -func (f *Fingerprint) Size() int { - return int(math.Sqrt(float64(len(f.samples)))) -} + // Find the dynamic range for DC coefficients + for i := 1; i < len(dct); i++ { + if dct[i] < min { + min = dct[i] + } + if dct[i] > max { + max = dct[i] + } + } -func (f Fingerprint) String() string { - return hex.EncodeToString(f.Bytes()) -} + scale := 127.0 / float64(max-min) / 2.0 -func (f *Fingerprint) UnmarshalBytes(fingerprintBytes []byte) error { - sampleCount := int(math.Sqrt(float64(len(fingerprintBytes) * samplesPerByte))) - sampleCount *= sampleCount - f.samples = make([]uint8, sampleCount) + fmt.Printf("DCT:\n") - for i := 0; i < sampleCount; i++ { - b := fingerprintBytes[i/samplesPerByte] - shift := uint(8 - bitsPerSample - (i%samplesPerByte)*bitsPerSample) - bits := b >> shift & sampleBitsMask - f.samples[i] = bits << (8 - bitsPerSample) - } + // Scale AC coefficient down by fixed amount + dct[0] >>= fingerprintACShift - return nil -} + // Scale DC coefficients down according to dynamic range + for i := 0; i < len(dct); i++ { + if i != 0 { + dct[i] = int16(float64(dct[i]) * scale) + } -func (f *Fingerprint) UnmarshalText(text []byte) error { - hexBytes := make([]byte, hex.DecodedLen(len(text))) - _, err := hex.Decode(hexBytes, text) - if err != nil { - return err + if i > 0 && i%fingerprintDCTSideLength == 0 { + fmt.Println() + } + fmt.Printf(" %5d", dct[i]) } + fmt.Println() + fmt.Println() - return f.UnmarshalBytes(hexBytes) + return dctToFingerprint(dct) } -func NewFingerprint(src image.Image, size int) Fingerprint { - scaled := image.NewNRGBA(image.Rectangle{Max: image.Point{X: size, Y: size}}) - draw.BiLinear.Scale(scaled, scaled.Bounds(), src, src.Bounds(), draw.Src, nil) +func dctToFingerprint(squareMatrix []int16) (f *Fingerprint) { + f = &Fingerprint{} - fingerprintSamples := make([]uint8, size*size) + level := 0 offset := 0 - for i := scaled.Bounds().Min.Y; i < scaled.Bounds().Max.Y; i++ { - for j := scaled.Bounds().Min.X; j < scaled.Bounds().Max.X; j++ { - r, g, b, _ := scaled.At(j, i).RGBA() - y, _, _ := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8)) + for i := 0; i != SamplesPerFingerprint; { + if offset == level { + + // Sample the last corner of the current square + f[i] = squareMatrix[level*fingerprintDCTSideLength+level] + i++ + + // Start the next larger square + offset = 0 + level++ + + } else { + + // Sample one from the right and one from the bottom + f[i] = squareMatrix[offset*fingerprintDCTSideLength+level] + i++ + f[i] = squareMatrix[level*fingerprintDCTSideLength+offset] + i++ - fingerprintSamples[offset] = y & (sampleBitsMask << (8 - bitsPerSample)) offset++ } } - return Fingerprint{samples: fingerprintSamples} + return } diff --git a/fingerprint_test.go b/fingerprint_test.go index 6ff7211..697c7e8 100644 --- a/fingerprint_test.go +++ b/fingerprint_test.go @@ -1,213 +1,156 @@ package simian import ( - "encoding/hex" - "fmt" "image" "image/color" - "math" + "math/rand" "testing" ) func TestFingerprint(t *testing.T) { - testImage := func() image.Image { + randomImage := func() image.Image { img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) - for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { - img.Set(j, i, color.RGBA{uint8(i), uint8(j), uint8(i), 255}) + img.Set(j, i, color.RGBA{uint8(rand.Int()), uint8(rand.Int()), uint8(rand.Int()), 255}) } } return img } - t.Run("Bytes() serialises to packed bytes", func(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xF0, 0xF0}} - - actualString := fmt.Sprintf("%x", f.Bytes()) - - if actualString != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) - } - }) - - t.Run("Difference() returns zero for same fingerprint", func(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - - diff := f1.Difference(f2) - - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - - diff = f2.Difference(f1) - - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - }) - - t.Run("Difference() returns one for completely different fingerprint", func(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0, 255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 0, 0, 0}} - - diff := f1.Difference(f2) - - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - - diff = f2.Difference(f1) - - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - }) - - t.Run("Difference() returns one for differently sized fingerprint", func(t *testing.T) { - f1 := Fingerprint{samples: []byte{255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 255}} - - diff := f1.Difference(f2) - - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - - diff = f2.Difference(f1) - - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } - }) - - t.Run("Distance() returns componentwise absolute difference", func(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{1, 3, 6, 11, 146, 0}} - - dist := f1.Distance(f2) - - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } - - dist = f2.Distance(f1) - - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } - }) - - t.Run("Distance() returns max value for mismatched length", func(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0}} - f2 := Fingerprint{samples: []byte{0, 0, 0, 0}} - - dist := f1.Distance(f2) - - if dist != math.MaxUint64 { - t.Errorf("Distance %d wasn't max uint64", dist) - } - }) + t.Run("dctToFingerprint()", func(t *testing.T) { + + t.Run("produces a recursive square traversal of a square 2D matrix", func(t *testing.T) { + m := []int16{ + 0, 1, 4, 9, 16, 25, 36, 49, + 2, 3, 6, 11, 18, 27, 38, 51, + 5, 7, 8, 13, 20, 29, 40, 53, + 10, 12, 14, 15, 22, 31, 42, 55, + 17, 19, 21, 23, 24, 33, 44, 57, + 26, 28, 30, 32, 34, 35, 46, 59, + 37, 39, 41, 43, 45, 47, 48, 61, + 50, 52, 54, 56, 58, 60, 62, 63, + } - t.Run("MarshalText() serialises to packed hex string bytes", func(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xFF, 0xFF}} + result := dctToFingerprint(m) - actual, err := f.MarshalText() + if expected, actual := len(m), len(result); expected != actual { + t.Fatalf("Expected result to be of length %d but got %d", expected, actual) + } - if err != nil { - t.Errorf("Error while marshalling: %s", err) - } - if string(actual) != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actual) - } + for i := 0; i < len(result); i++ { + if result[i] != int16(i) { + t.Errorf("Expected element %d but got %d", i, result[i]) + } + } + }) }) - t.Run("Size() returns correct side length", func(t *testing.T) { - img := testImage() + t.Run("Difference()", func(t *testing.T) { - f := NewFingerprint(img, 3) - size := f.Size() + t.Run("returns 0.0 for an exact match", func(t *testing.T) { + f := NewFingerprintFromImage(randomImage()) - if size != 3 { - t.Errorf("Size %d doesn't match expected", size) - } + difference := f.Difference(f) - f = NewFingerprint(img, 7) - size = f.Size() + if difference > 0.00001 { + t.Errorf("Expected no difference but got %f", difference) + } + }) - if size != 7 { - t.Errorf("Size %d doesn't match expected", size) - } + t.Run("returns higher than 0.0 for different images", func(t *testing.T) { + f1 := NewFingerprintFromImage(randomImage()) + f2 := NewFingerprintFromImage(randomImage()) - f = Fingerprint{samples: make([]byte, 5*5)} - size = f.Size() + difference := f1.Difference(f2) - if size != 5 { - t.Errorf("Size %d doesn't match expected", size) - } + if difference <= 0.001 { + t.Errorf("Expected some difference but got %f", difference) + } + }) }) - t.Run("String() serialises to packed hex string", func(t *testing.T) { - f := Fingerprint{samples: []byte{ - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - }} - - actualString := fmt.Sprintf("%s", f) - - if actualString != "fffffffffffffffffffffffff0" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) - } - }) + t.Run("FingerprintFromImage()", func(t *testing.T) { - t.Run("UnmarshalBytes() deserialises from packed bytes", func(t *testing.T) { - b := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0} + t.Run("should product correct fingerprint from DCT of white image", func(t *testing.T) { + img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) + for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { + for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { + img.Set(j, i, color.RGBA{uint8(255), uint8(255), uint8(255), 255}) + } + } - f := Fingerprint{} - f.UnmarshalBytes(b) + f := NewFingerprintFromImage(img) - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + if expected, actual := int16(8064>>fingerprintACShift), f[0]; actual != expected { + t.Errorf("Expected value %d but found %d at position 0", expected, actual) } - } - }) - t.Run("UnmarshalText() deserialises from packed hex string bytes", func(t *testing.T) { - text := []byte("fffffffffffffffffffffffff0") + for i := 1; i < len(f); i++ { + if expected, actual := int16(0), f[i]; actual != expected { + t.Errorf("Expected value %d but found %d at position %d", expected, actual, i) + } + } + }) + + t.Run("should product correct fingerprint from DCT of checkered image", func(t *testing.T) { + img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: fingerprintDCTSideLength, Y: fingerprintDCTSideLength}}) + offset := 0 + for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { + for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { + if offset%2 == 0 { + img.Set(j, i, color.RGBA{uint8(255), uint8(255), uint8(255), 255}) + } else { + img.Set(j, i, color.RGBA{uint8(0), uint8(0), uint8(0), 255}) + } + offset++ + } + offset++ + } - f := Fingerprint{} - f.UnmarshalText(text) + f := NewFingerprintFromImage(img) + + expected := Fingerprint{ + -1, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 4, 4, 0, 0, 5, + 5, 0, 0, 7, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 12, 12, 0, 0, 14, + 14, 0, 0, 22, 22, 0, 0, 63, + } - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + for i := 0; i < len(expected); i++ { + if expected[i] != f[i] { + t.Errorf("Expected value %d but found %d at position %d", expected[i], f[i], i) + } } - } + }) }) - t.Run("NewFingerprint() generates binary representation", func(t *testing.T) { - f := NewFingerprint(testImage(), 3) + t.Run("Prefix()", func(t *testing.T) { - expected, _ := hex.DecodeString("3060805080a070a0c0") + t.Run("returns correct prefix for each level", func(t *testing.T) { + f := NewFingerprintFromImage(randomImage()) - expectedString := hex.EncodeToString(expected) - actualString := hex.EncodeToString(f.samples) + for level := 0; level < fingerprintDCTSideLength; level++ { + prefix := f.Prefix(level) + expectedPrefix := f[:level*level] - if expectedString != actualString { - t.Fatalf("Fingerprint '%s' doesn't match expected '%s'", actualString, expectedString) - } + if expected, actual := len(expectedPrefix), len(prefix); actual != expected { + t.Errorf("Expected length %d but got prefix of length %d", expected, actual) + + } else { + for i := 0; i < len(expectedPrefix); i++ { + if expected, actual := expectedPrefix[i], prefix[i]; actual != expected { + t.Errorf("Expected %d but got prefix value %d", expected, actual) + } + } + } + } + }) }) } diff --git a/index.go b/index.go deleted file mode 100644 index 83513aa..0000000 --- a/index.go +++ /dev/null @@ -1,116 +0,0 @@ -package simian - -import ( - "fmt" - "image" - "math" - "os" - "sort" -) - -const rootFingerprintSize = 1 - -type Index struct { - Store IndexStore - maxFingerprintSize int - maxEntryDifference float64 -} - -func (i *Index) Add(image image.Image, metadata map[string]interface{}) (key string, err error) { - entry, err := NewIndexEntry(image, i.maxFingerprintSize, metadata) - if err != nil { - return "", nil - } - - root, err := i.Store.GetRoot() - if err != nil { - return "", err - } - - var rootFingerprint Fingerprint - - _, err = root.Add(entry, rootFingerprint, rootFingerprintSize+1, i) - if err != nil { - return "", err - } - - fmt.Printf("Root node has %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) - - return "", nil -} - -func (i *Index) Close() error { - return i.Store.Close() -} - -func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference float64) ([]*IndexEntry, error) { - var dummy map[string]interface{} - - entry, err := NewIndexEntry(image, i.maxFingerprintSize, dummy) - if err != nil { - return nil, nil - } - - root, err := i.Store.GetRoot() - if err != nil { - return nil, err - } - - results, err := root.FindNearest(entry, rootFingerprintSize+1, i, maxResults, math.Max(maxDifference, i.maxEntryDifference)) - if err != nil { - return nil, err - } - sort.Sort(entriesByDifferenceToEntryWith(results, entry)) - - return results, err -} - -func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) (*Index, error) { - err := os.MkdirAll(path, 0700) - if err != nil { - return nil, err - } - - indexStore, err := NewDiskIndexStore(path) - if err != nil { - return nil, err - } - - return &Index{ - Store: indexStore, - maxFingerprintSize: maxFingerprintSize, - maxEntryDifference: maxEntryDifference, - }, err -} - -type entriesByDifferenceToEntry struct { - entries []*IndexEntry - differences []float64 -} - -func (sorter *entriesByDifferenceToEntry) Len() int { - return len(sorter.entries) -} - -func (sorter *entriesByDifferenceToEntry) Less(i, j int) bool { - return sorter.differences[i] < sorter.differences[j] -} - -func (sorter *entriesByDifferenceToEntry) Swap(i, j int) { - tmpEntry := sorter.entries[i] - sorter.entries[i] = sorter.entries[j] - sorter.entries[j] = tmpEntry - - tmpDiff := sorter.differences[i] - sorter.differences[i] = sorter.differences[j] - sorter.differences[j] = tmpDiff -} - -func entriesByDifferenceToEntryWith(entries []*IndexEntry, target *IndexEntry) *entriesByDifferenceToEntry { - differences := make([]float64, len(entries), len(entries)) - for i, entry := range entries { - differences[i] = entry.MaxFingerprint.Difference(target.MaxFingerprint) - } - - return &entriesByDifferenceToEntry{entries: entries, differences: differences} -} diff --git a/indexentry.go b/indexentry.go deleted file mode 100644 index 57fe408..0000000 --- a/indexentry.go +++ /dev/null @@ -1,109 +0,0 @@ -package simian - -import ( - "encoding/json" - "image" - "image/png" - "os" - "path/filepath" - - "golang.org/x/image/draw" -) - -const keyBitLength = 256 - -type IndexEntry struct { - Thumbnail image.Image - MaxFingerprint Fingerprint - Attributes map[string]interface{} -} - -func (entry *IndexEntry) FingerprintForSize(size int) Fingerprint { - return NewFingerprint(entry.Thumbnail, size) -} - -func (entry *IndexEntry) MarshalJSON() ([]byte, error) { - return json.Marshal(&indexEntryJSON{ - MaxFingerprint: entry.MaxFingerprint.Bytes(), - Attributes: entry.Attributes, - }) -} - -func (entry *IndexEntry) UnmarshalJSON(b []byte) error { - var value indexEntryJSON - err := json.Unmarshal(b, &value) - if err != nil { - return err - } - - var fingerprint Fingerprint - err = fingerprint.UnmarshalBytes(value.MaxFingerprint) - if err != nil { - return err - } - - entry.MaxFingerprint = fingerprint - entry.Attributes = value.Attributes - - return nil -} - -func (entry *IndexEntry) loadThumbnail(path string) error { - thumbnailFile, err := os.Open(path) - if err != nil { - return err - } - defer thumbnailFile.Close() - - entry.Thumbnail, err = png.Decode(thumbnailFile) - return err -} - -func (entry *IndexEntry) saveThumbnail(path string) error { - thumbnailDir := filepath.Dir(path) - os.MkdirAll(thumbnailDir, os.FileMode(0700)) - - thumbnailOut, err := os.Create(path) - if err != nil { - return err - } - defer thumbnailOut.Close() - - pngEncoder := png.Encoder{} - return pngEncoder.Encode(thumbnailOut, entry.Thumbnail) -} - -func NewIndexEntry(image image.Image, maxFingerprintSize int, attributes map[string]interface{}) (*IndexEntry, error) { - entry := &IndexEntry{ - Thumbnail: makeThumbnail(image, maxFingerprintSize*2), - Attributes: attributes, - } - - entry.MaxFingerprint = entry.FingerprintForSize(maxFingerprintSize) - - return entry, nil -} - -func makeThumbnail(src image.Image, size int) image.Image { - width := float64(src.Bounds().Max.X - src.Bounds().Min.X) - height := float64(src.Bounds().Max.Y - src.Bounds().Min.Y) - target := float64(size) - - if width > height { - width /= height / target - height = target - } else { - height /= width / target - width = target - } - - thumbnail := image.NewNRGBA(image.Rect(0, 0, int(width), int(height))) - draw.BiLinear.Scale(thumbnail, thumbnail.Bounds(), src, src.Bounds(), draw.Src, nil) - - return thumbnail -} - -type indexEntryJSON struct { - MaxFingerprint []byte `json:"maxFingerprint"` - Attributes map[string]interface{} `json:"attributes"` -} diff --git a/indexentry_test.go b/indexentry_test.go deleted file mode 100644 index aae46e3..0000000 --- a/indexentry_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package simian - -import ( - "encoding/json" - "reflect" - "testing" -) - -func TestIndexEntry(t *testing.T) { - - t.Run("JSON serialisation", func(t *testing.T) { - - t.Run("should roundtrip all fields", func(t *testing.T) { - - entry := &IndexEntry{ - MaxFingerprint: Fingerprint{samples: []uint8{0xF0, 0xF0, 0xF0, 0xF0}}, - Attributes: make(map[string]interface{}), - } - entry.Attributes["some key"] = "some value" - entry.Attributes["some other key"] = "some other value" - - jsonBytes, err := json.Marshal(entry) - if err != nil { - t.Fatalf("Error marshalling JSON: %v", err) - } - - var result *IndexEntry - err = json.Unmarshal(jsonBytes, &result) - if err != nil { - t.Fatalf("Error unmarshalling JSON: %v", err) - } - - if distance := result.MaxFingerprint.Distance(entry.MaxFingerprint); distance != 0 { - t.Errorf("Expected no difference in fingerprints but got %d", distance) - } - if !reflect.DeepEqual(entry.Attributes, result.Attributes) { - t.Errorf("Expected attributes to match but got %v", result.Attributes) - } - }) - }) -} diff --git a/indexnode.go b/indexnode.go deleted file mode 100644 index 7b572e7..0000000 --- a/indexnode.go +++ /dev/null @@ -1,270 +0,0 @@ -package simian - -import ( - "encoding/json" - "errors" - "fmt" - "math" - "sort" -) - -var errResultLimitReached = errors.New("result limit reached") - -type IndexNode struct { - childFingerprints []Fingerprint - childFingerprintsByString map[string]*Fingerprint - entries []*IndexEntry -} - -func (node *IndexNode) Add(entry *IndexEntry, nodeFingerprint Fingerprint, childFingerprintSize int, index *Index) (*IndexNode, error) { - - fmt.Printf("Node Add %d\n", childFingerprintSize) - - childFingerprint := entry.FingerprintForSize(childFingerprintSize) - - if len(node.childFingerprints) == 0 { - - // We can go deeper and this new entry is sufficiently different to - // the rest, so split this leaf node by turning entries into children. - fmt.Printf("Max Diff: %f\n", node.maxChildDifferenceTo(entry.MaxFingerprint)) - if childFingerprintSize < index.maxFingerprintSize && node.maxChildDifferenceTo(entry.MaxFingerprint) > index.maxEntryDifference { - fmt.Printf("Pushing %d entries to children\n", len(node.entries)) - node.pushEntriesToChildren(nodeFingerprint, childFingerprintSize, index.Store) - fmt.Printf("Done pushing entries to children\n") - - } else { - fmt.Printf("Adding entry %s\n", nodeFingerprint.String()) - err := index.Store.AddEntry(entry, node, nodeFingerprint) - if err != nil { - return nil, err - } - fmt.Printf("Added entry\n") - return node, nil - } - } - - child, err := index.Store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) - if err != nil { - return nil, err - } - - return child.Add(entry, childFingerprint, childFingerprintSize+1, index) -} - -func (node *IndexNode) FindNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxResults int, maxDifference float64) ([]*IndexEntry, error) { - results := make([]*IndexEntry, 0, maxResults) - - err := node.gatherNearest(entry, childFingerprintSize, index, maxDifference, &results) - if err != nil && err != errResultLimitReached { - return nil, err - } - - return results, nil -} - -func (node *IndexNode) MarshalJSON() ([]byte, error) { - return json.Marshal(&indexNodeJSON{ - ChildFingerprints: node.childFingerprints, - Entries: node.entries, - }) -} - -func (node *IndexNode) UnmarshalJSON(b []byte) error { - var value indexNodeJSON - err := json.Unmarshal(b, &value) - if err != nil { - return err - } - - node.childFingerprints = value.ChildFingerprints - - node.childFingerprintsByString = make(map[string]*Fingerprint) - for i := 0; i < len(node.childFingerprints); i++ { - f := &node.childFingerprints[i] - node.childFingerprintsByString[f.String()] = f - } - - node.entries = value.Entries - - return nil -} - -func (node *IndexNode) addSimilarEntriesTo(entries *[]*IndexEntry, fingerprint Fingerprint, maxDifference float64) error { - fmt.Printf("addSimilarEntriesTo\n") - - return node.withEachEntry(func(entry *IndexEntry) error { - if len(*entries) >= cap(*entries) { - fmt.Printf("Max results hit\n") - return errResultLimitReached - } - - diff := entry.MaxFingerprint.Difference(fingerprint) - if diff <= maxDifference { - fmt.Printf("Found %d of difference %f\n", len(*entries), diff) - *entries = append(*entries, entry) - } else { - fmt.Printf("Max difference hit at %f\n", diff) - return errResultLimitReached - } - - return nil - }) -} - -func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxDifference float64, results *[]*IndexEntry) error { - - fmt.Printf("%d gatherNearest %d\n", childFingerprintSize, len(node.entries)) - - // Check for an exact matching child - childFingerprint := entry.FingerprintForSize(childFingerprintSize) - exactChildFingerprint, exactChildFingerprintExists := node.childFingerprintsByString[childFingerprint.String()] - - var exactChildFingerprintString string - var exactChild *IndexNode - if exactChildFingerprintExists { - exactChildFingerprintString = exactChildFingerprint.String() - - var err error - exactChild, err = index.Store.GetChild(childFingerprint, node) - if err != nil { - return err - } - } - - // One exists - recursively search it - if exactChild != nil { - err := exactChild.gatherNearest(entry, childFingerprintSize+1, index, maxDifference, results) - if err != nil { - return err - } - - err = exactChild.addSimilarEntriesTo(results, entry.MaxFingerprint, maxDifference) - if err != nil { - return err - } - } - - childFingerprints := make([]Fingerprint, len(node.childFingerprints)) - copy(childFingerprints, node.childFingerprints) - - // Need more results - find and sort all children by nearness - sort.Sort(nodesByDifferenceToFingerprintWith(childFingerprints, childFingerprint)) - - // fmt.Printf("Sorting %d children...\n", len(children)) - // for i, child := range children { - // diff := child.fingerprint.Difference(entryFingerprint) - // fmt.Printf("%d sorted child %d of %f (%d %d)\n", childFingerprintSize+1, i, diff, len(child.fingerprint.samples), len(entryFingerprint.samples)) - // } - - // Recursively gather from nearest children - for i, cf := range childFingerprints { - fmt.Printf("Visiting child %d\n", i) - if exactChildFingerprintExists && cf.String() == exactChildFingerprintString { - continue - } - - childNode, err := index.Store.GetChild(cf, node) - if err != nil { - return err - } - - err = childNode.gatherNearest(entry, childFingerprintSize+1, index, maxDifference, results) - if err != nil { - return err - } - - err = childNode.addSimilarEntriesTo(results, entry.MaxFingerprint, maxDifference) - if err != nil { - return err - } - } - - return nil -} - -func (node *IndexNode) maxChildDifferenceTo(f Fingerprint) float64 { - maxDifference := 0.0 - - node.withEachEntry(func(entry *IndexEntry) error { - diff := entry.MaxFingerprint.Difference(f) - maxDifference = math.Max(diff, maxDifference) - return nil - }) - - return maxDifference -} - -func (node *IndexNode) pushEntriesToChildren(nodeFingerprint Fingerprint, childFingerprintSize int, store IndexStore) error { - node.withEachEntry(func(entry *IndexEntry) error { - childFingerprint := entry.FingerprintForSize(childFingerprintSize) - child, err := store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) - if err != nil { - return err - } - fmt.Printf("Pushing entry to child\n") - return store.AddEntry(entry, child, childFingerprint) - }) - - return store.RemoveEntries(node, nodeFingerprint) -} - -func (node *IndexNode) registerChild(childFingerprint Fingerprint) { - node.childFingerprints = append(node.childFingerprints, childFingerprint) - node.childFingerprintsByString[childFingerprint.String()] = &node.childFingerprints[len(node.childFingerprints)-1] -} - -func (node *IndexNode) registerEntry(entry *IndexEntry) { - node.entries = append(node.entries, entry) -} - -func (node *IndexNode) removeEntries() { - node.entries = nil -} - -func (node *IndexNode) withEachEntry(action func(*IndexEntry) error) error { - for _, entry := range node.entries { - err := action(entry) - if err != nil { - return err - } - } - - return nil -} - -type indexNodeJSON struct { - ChildFingerprints []Fingerprint `json:"childFingerprints"` - Entries []*IndexEntry `json:"entries"` -} - -type nodesByDifferenceToFingerprint struct { - nodeFingerprints []Fingerprint - differences []float64 -} - -func (sorter *nodesByDifferenceToFingerprint) Len() int { - return len(sorter.nodeFingerprints) -} - -func (sorter *nodesByDifferenceToFingerprint) Less(i, j int) bool { - return sorter.differences[i] < sorter.differences[j] -} - -func (sorter *nodesByDifferenceToFingerprint) Swap(i, j int) { - tmp := sorter.nodeFingerprints[i] - sorter.nodeFingerprints[i] = sorter.nodeFingerprints[j] - sorter.nodeFingerprints[j] = tmp - - tmpDiff := sorter.differences[i] - sorter.differences[i] = sorter.differences[j] - sorter.differences[j] = tmpDiff -} - -func nodesByDifferenceToFingerprintWith(nodeFingerprints []Fingerprint, f Fingerprint) *nodesByDifferenceToFingerprint { - differences := make([]float64, len(nodeFingerprints), len(nodeFingerprints)) - for i, nf := range nodeFingerprints { - differences[i] = nf.Difference(f) - } - - return &nodesByDifferenceToFingerprint{nodeFingerprints: nodeFingerprints, differences: differences} -} diff --git a/indexnode_test.go b/indexnode_test.go deleted file mode 100644 index 75ee02b..0000000 --- a/indexnode_test.go +++ /dev/null @@ -1,89 +0,0 @@ -package simian - -import ( - "encoding/json" - "testing" -) - -func TestIndexNode(t *testing.T) { - - t.Run("JSON serialisation", func(t *testing.T) { - - t.Run("should roundtrip all fields", func(t *testing.T) { - n := &IndexNode{ - childFingerprintsByString: make(map[string]*Fingerprint), - } - - n.registerChild(Fingerprint{samples: []uint8{0x10, 0x20, 0x30, 0x40}}) - n.registerChild(Fingerprint{samples: []uint8{0x50, 0x60, 0x70, 0x80}}) - - entry1 := &IndexEntry{ - MaxFingerprint: Fingerprint{samples: []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9}}, - Attributes: make(map[string]interface{}), - } - n.registerEntry(entry1) - - entry2 := &IndexEntry{ - MaxFingerprint: Fingerprint{samples: []uint8{10, 11, 12, 13, 14, 15, 16, 17, 18}}, - Attributes: make(map[string]interface{}), - } - n.registerEntry(entry2) - - jsonBytes, err := json.Marshal(n) - if err != nil { - t.Fatalf("Error marshalling JSON: %v", err) - } - - var result *IndexNode - err = json.Unmarshal(jsonBytes, &result) - if err != nil { - t.Fatalf("Error unmarshalling JSON: %v", err) - } - - if actual, expected := len(result.childFingerprints), len(n.childFingerprints); actual != expected { - t.Fatalf("Expected %d child fingerprints but got %d", expected, actual) - } - for i := 0; i < len(result.childFingerprints); i++ { - actual := result.childFingerprints[i].String() - expected := n.childFingerprints[i].String() - - if actual != expected { - t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) - } - } - - if actual, expected := len(result.childFingerprintsByString), len(n.childFingerprintsByString); actual != expected { - t.Fatalf("Expected %d child fingerprints mapped by string but got %d", expected, actual) - } - for k, v := range n.childFingerprintsByString { - actual := result.childFingerprintsByString[k].String() - expected := v.String() - - if actual != expected { - t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) - } - } - - if actual, expected := len(result.entries), len(n.entries); actual != expected { - t.Fatalf("Expected %d entries but got %d", expected, actual) - } - for i := 0; i < len(result.entries); i++ { - actualBytes, err := json.Marshal(result.entries[i]) - if err != nil { - t.Fatalf("Error marshalling entry: %v", err) - } - actual := string(actualBytes) - - expectedBytes, err := json.Marshal(n.entries[i]) - if err != nil { - t.Fatalf("Error marshalling entry: %v", err) - } - expected := string(expectedBytes) - - if actual != expected { - t.Errorf("Expected entry '%s' but got '%s'", expected, actual) - } - } - }) - }) -} diff --git a/indexstore.go b/indexstore.go deleted file mode 100644 index 7df2d02..0000000 --- a/indexstore.go +++ /dev/null @@ -1,10 +0,0 @@ -package simian - -type IndexStore interface { - AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error - Close() error - GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) - GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) - GetRoot() (*IndexNode, error) - RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error -} diff --git a/math.go b/math.go new file mode 100644 index 0000000..c7025e8 --- /dev/null +++ b/math.go @@ -0,0 +1,32 @@ +package simian + +import ( + "math" +) + +func DCT(width int, height int, values []int8) (result []int16) { + + doubleWidth := 2.0 * float64(width) + doubleHeight := 2.0 * float64(height) + + result = make([]int16, len(values)) + + for u := 0; u < height; u++ { + for v := 0; v < width; v++ { + sum := 0.0 + + for i := 0; i < height; i++ { + for j := 0; j < width; j++ { + + sum += float64(values[i*width+j]) * + math.Cos(((math.Pi*float64(u))/doubleHeight)*(2*float64(i)+1)) * + math.Cos(((math.Pi*float64(v))/doubleWidth)*(2*float64(j)+1)) + } + } + + result[u*width+v] = int16(sum) + } + } + + return +} diff --git a/simian-compare/main.go b/simian-compare/main.go new file mode 100644 index 0000000..eda4372 --- /dev/null +++ b/simian-compare/main.go @@ -0,0 +1,65 @@ +package main + +import ( + "fmt" + "image" + _ "image/jpeg" + _ "image/png" + "os" + + "github.com/mandykoh/simian" +) + +func makeFingerprintFromImageFile(imageFileName string) (f *simian.Fingerprint, err error) { + var imageFile *os.File + imageFile, err = os.Open(imageFileName) + if err != nil { + return + } + defer imageFile.Close() + + var img image.Image + img, _, err = image.Decode(imageFile) + if err != nil { + return + } + + return simian.NewFingerprintFromImage(img), nil +} + +func main() { + if len(os.Args) < 3 { + fmt.Printf("Usage: simian-compare \n") + return + } + + fingerprint1, err := makeFingerprintFromImageFile(os.Args[1]) + if err != nil { + fmt.Printf("Error: %s\n", err) + return + } + + fingerprint2, err := makeFingerprintFromImageFile(os.Args[2]) + if err != nil { + fmt.Printf("Error: %s\n", err) + return + } + + difference := fingerprint1.Difference(fingerprint2) + + var judgment string + switch { + case difference < 0.05: + judgment = "duplicate" + case difference < 0.1: + judgment = "variation" + case difference < 0.2: + judgment = "similar" + case difference < 0.3: + judgment = "tonally/texturally similar" + default: + judgment = "different" + } + + fmt.Printf("%.4f (%s)\n", difference, judgment) +} diff --git a/simian-fingerprint/main.go b/simian-fingerprint/main.go new file mode 100644 index 0000000..7804acf --- /dev/null +++ b/simian-fingerprint/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "fmt" + "image" + _ "image/jpeg" + _ "image/png" + "os" + + "github.com/mandykoh/simian" +) + +func main() { + if len(os.Args) < 2 { + fmt.Printf("Usage: simian-fingerprint \n") + return + } + + imageFile, err := os.Open(os.Args[1]) + if err != nil { + fmt.Printf("Error: %s\n", err) + return + } + defer imageFile.Close() + + img, _, err := image.Decode(imageFile) + if err != nil { + fmt.Printf("Error: %s\n", err) + return + } + + fingerprint := simian.NewFingerprintFromImage(img) + + for i := 0; i < len(fingerprint); i++ { + fmt.Printf("%02x", fingerprint[i]+128) + } + fmt.Println() +} diff --git a/vendor/vendor.json b/vendor/vendor.json index ba70d97..ddb0bbf 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -15,7 +15,7 @@ "revisionTime": "2017-06-17T12:17:10Z" }, { - "checksumSHA1": "7E3Y1HU/UbsQF/dxMRdjFmx9QDQ=", + "checksumSHA1": "4+1dxGgXahv2izGbBxftVoQjoXI=", "path": "golang.org/x/image/draw", "revision": "83686c547965220f8b5d75e83ddc67d73420a89f", "revisionTime": "2017-01-15T09:09:03Z"