From d11404f254ba91895753e1a8ea6344143b15a371 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 10:05:24 -0800 Subject: [PATCH 01/10] make the evaluation data codable --- Examples/BeeTrackingTool/main.swift | 37 ++------ Sources/BeeTracking/TrackingMetrics.swift | 95 ++++++++++++++----- Sources/SwiftFusion/Core/VectorN.swift | 24 ++--- Sources/SwiftFusion/Core/VectorN.swift.gyb | 2 +- Sources/SwiftFusion/Geometry/Pose2.swift | 4 +- Sources/SwiftFusion/Geometry/Rot2.swift | 4 +- .../Image/OrientedBoundingBox.swift | 2 +- 7 files changed, 98 insertions(+), 70 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index 92165619..21312938 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -162,36 +162,15 @@ struct InferTrackRAE: ParsableCommand { /// Infers a track on a VOT video, using the raw pixel tracker. struct InferTrackRawPixels: ParsableCommand { - @Option(help: "Base directory of the VOT dataset") - var votBaseDirectory: String - - @Option(help: "Name of the VOT video to use") - var videoName: String - - @Option(help: "How many frames to track") - var frameCount: Int = 50 - - @Flag(help: "Print progress information") - var verbose: Bool = false - func run() { - let video = VOTVideo(votBaseDirectory: votBaseDirectory, videoName: videoName)! - let videoSlice = video[0.. OrientedBoundingBox in - let poseID = frameVariableIDs.head - return OrientedBoundingBox( - center: prediction[poseID], rows: video.track[0].rows, cols: video.track[0].cols) - } + let dataset = OISTBeeVideo()! + // Only do inference on the interesting tracks. + dataset.tracks = [3, 5, 6, 7].map { dataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(dataset) + let eval = trackerEvaluationDataset.evaluate( + rawPixelTracker, sequenceCount: dataset.tracks.count, deltaAnchor: 100, outputFile: "rawpixel.json") + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) } } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index 70ec46c3..1916f9f5 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -1,4 +1,9 @@ import BeeDataset +<<<<<<< HEAD +======= +import Foundation +import PenguinParallelWithFoundation +>>>>>>> bb5b886 (make the evaluation data codable) import PythonKit import SwiftFusion import TensorFlow @@ -6,7 +11,7 @@ import TensorFlow /// Accuracy and robustness for a subsequence, as defined in the VOT 2020 challenge [1]. /// /// [1] http://prints.vicos.si/publications/384 -public struct SubsequenceMetrics { +public struct SubsequenceMetrics: Codable { /// Accuracy as defined in [1], equation (1). public let accuracy: Double @@ -82,7 +87,7 @@ public struct SubsequenceMetrics { } /// Accuracy and robustness for a sequence, as defined in [1]. -public struct SequenceMetrics { +public struct SequenceMetrics: Codable { /// Accuracy as defined in [1], equation (3). public let accuracy: Double @@ -118,7 +123,7 @@ public struct SequenceMetrics { } /// Accuracy and robustness for a tracker, as defined in [1]. -public struct TrackerMetrics { +public struct TrackerMetrics: Codable { /// Accuracy as defined in [1], equation (5). public let accuracy: Double @@ -134,7 +139,7 @@ public struct TrackerMetrics { } /// Expected Averge Overlap (EAO) for a tracker, as defined in [1]. -public struct ExpectedAverageOverlap { +public struct ExpectedAverageOverlap: Codable { /// The values in the EAO curve. public let curve: [Double] @@ -167,17 +172,33 @@ extension TrackerEvaluationDataset { /// Evaluate the performance of `tracker` on `self`. /// /// Prameter sequenceCount: How many sequences from `self` to use during the evaluation. - public func evaluate(_ tracker: Tracker, sequenceCount: Int) -> TrackerEvaluationResults { + public func evaluate( + _ tracker: Tracker, + sequenceCount: Int, + deltaAnchor: Int, + outputFile: String + ) -> TrackerEvaluationResults { let sequenceEvaluations = sequences.prefix(sequenceCount).enumerated().map { (i, sequence) -> SequenceEvaluationResults in +<<<<<<< HEAD print("Evaluating sequence \(i + 1) of \(sequences.count)") return sequence.evaluate(tracker) +======= + print("Evaluating sequence \(i + 1) of \(sequenceCount)") + return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)") +>>>>>>> bb5b886 (make the evaluation data codable) } - return TrackerEvaluationResults( + let result = TrackerEvaluationResults( sequences: sequenceEvaluations, trackerMetrics: TrackerMetrics(sequenceEvaluations.map { $0.sequenceMetrics }), expectedAverageOverlap: ExpectedAverageOverlap( sequenceEvaluations.flatMap { $0.subsequences }.map { $0.metrics })) + + let encoder = JSONEncoder() + let data = try! encoder.encode(result) + FileManager.default.createFile(atPath: "\(outputFile).json", contents: data, attributes: nil) + + return result } } @@ -200,31 +221,54 @@ public struct TrackerEvaluationSequence { extension TrackerEvaluationSequence { /// Returns the performance of `tracker` on the sequence `self`. +<<<<<<< HEAD public func evaluate(_ tracker: Tracker) -> SequenceEvaluationResults { let subsequences = self.subsequences(deltaAnchor: 50) let subsequenceEvaluations = subsequences.enumerated().map { (i, subsequence) -> (metrics: SubsequenceMetrics, prediction: [OrientedBoundingBox]) in print("Evaluating subsequence \(i + 1) of \(subsequences.count)") return subsequence.evaluateSubsequence(tracker) - } - return SequenceEvaluationResults( - subsequences: subsequenceEvaluations, - sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) - } - - /// Returns the performance of `tracker` on the subsequence `self`. - public func evaluateSubsequence(_ tracker: Tracker) - -> (metrics: SubsequenceMetrics, prediction: [OrientedBoundingBox]) - { +======= + public func evaluate(_ tracker: Tracker, deltaAnchor: Int, outputFile: String) -> SequenceEvaluationResults { guard let _ = try? Python.attemptImport("shapely") else { print("python shapely library must be installed") preconditionFailure() } - let prediction = tracker(frames, groundTruth[0]) - return ( - metrics: SubsequenceMetrics(groundTruth: groundTruth, prediction: prediction), - prediction: prediction) + let subsequences = self.subsequences(deltaAnchor: deltaAnchor) + let subsequencePredictions = [[OrientedBoundingBox]]( + unsafeUninitializedCapacity: subsequences.count + ) { (buf, actualCount) in + let blockCount = 4 + ComputeThreadPools.local.parallelFor(n: blockCount) { (blockIndex, _) in + for i in 0..>>>>>> bb5b886 (make the evaluation data codable) + } + let result = SequenceEvaluationResults( + subsequences: subsequenceEvaluations, + sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) + + let encoder = JSONEncoder() + let data = try! encoder.encode(result) + FileManager.default.createFile(atPath: "\(outputFile).json", contents: data, attributes: nil) + + return result } } @@ -246,7 +290,7 @@ extension TrackerEvaluationDataset { } /// All the tracker evaluation metrics in one struct. -public struct TrackerEvaluationResults { +public struct TrackerEvaluationResults: Codable { /// The sequence results for all the sequences in the dataset. public let sequences: [SequenceEvaluationResults] @@ -258,14 +302,19 @@ public struct TrackerEvaluationResults { } /// All the sequence evaluation metrics in one struct. -public struct SequenceEvaluationResults { +public struct SequenceEvaluationResults: Codable { /// The subsequence metrics for all subsequences in this sequence. And the predictions. - public let subsequences: [(metrics: SubsequenceMetrics, prediction: [OrientedBoundingBox])] + public let subsequences: [SubsequenceEvaluationResults] /// The sequence metrics for this sequence. public let sequenceMetrics: SequenceMetrics } +public struct SubsequenceEvaluationResults: Codable { + public let metrics: SubsequenceMetrics + public let prediction: [OrientedBoundingBox] +} + /// Given `frames` and a `start` region containing an object to track, returns predicted regions /// for all `frames` (including the first one). public typealias Tracker = diff --git a/Sources/SwiftFusion/Core/VectorN.swift b/Sources/SwiftFusion/Core/VectorN.swift index 0a6b8449..86a79d78 100644 --- a/Sources/SwiftFusion/Core/VectorN.swift +++ b/Sources/SwiftFusion/Core/VectorN.swift @@ -5,7 +5,7 @@ import TensorFlow /// An element of R^1, with Euclidean inner product. -public struct Vector1: KeyPathIterable { +public struct Vector1: Codable, KeyPathIterable { @differentiable public var x: Double @differentiable @@ -94,7 +94,7 @@ extension Vector1: ElementaryFunctions {} /// An element of R^2, with Euclidean inner product. -public struct Vector2: KeyPathIterable { +public struct Vector2: Codable, KeyPathIterable { @differentiable public var x: Double @differentiable public var y: Double @@ -191,7 +191,7 @@ extension Vector2: ElementaryFunctions {} /// An element of R^3, with Euclidean inner product. -public struct Vector3: KeyPathIterable { +public struct Vector3: Codable, KeyPathIterable { @differentiable public var x: Double @differentiable public var y: Double @differentiable public var z: Double @@ -296,7 +296,7 @@ extension Vector3: ElementaryFunctions {} /// An element of R^4, with Euclidean inner product. -public struct Vector4: KeyPathIterable { +public struct Vector4: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -409,7 +409,7 @@ extension Vector4: ElementaryFunctions {} /// An element of R^5, with Euclidean inner product. -public struct Vector5: KeyPathIterable { +public struct Vector5: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -530,7 +530,7 @@ extension Vector5: ElementaryFunctions {} /// An element of R^6, with Euclidean inner product. -public struct Vector6: KeyPathIterable { +public struct Vector6: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -659,7 +659,7 @@ extension Vector6: ElementaryFunctions {} /// An element of R^7, with Euclidean inner product. -public struct Vector7: KeyPathIterable { +public struct Vector7: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -796,7 +796,7 @@ extension Vector7: ElementaryFunctions {} /// An element of R^8, with Euclidean inner product. -public struct Vector8: KeyPathIterable { +public struct Vector8: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -941,7 +941,7 @@ extension Vector8: ElementaryFunctions {} /// An element of R^9, with Euclidean inner product. -public struct Vector9: KeyPathIterable { +public struct Vector9: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -1094,7 +1094,7 @@ extension Vector9: ElementaryFunctions {} /// An element of R^10, with Euclidean inner product. -public struct Vector10: KeyPathIterable { +public struct Vector10: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -1255,7 +1255,7 @@ extension Vector10: ElementaryFunctions {} /// An element of R^11, with Euclidean inner product. -public struct Vector11: KeyPathIterable { +public struct Vector11: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double @@ -1424,7 +1424,7 @@ extension Vector11: ElementaryFunctions {} /// An element of R^12, with Euclidean inner product. -public struct Vector12: KeyPathIterable { +public struct Vector12: Codable, KeyPathIterable { @differentiable public var s0: Double @differentiable public var s1: Double @differentiable public var s2: Double diff --git a/Sources/SwiftFusion/Core/VectorN.swift.gyb b/Sources/SwiftFusion/Core/VectorN.swift.gyb index 6fd4c230..181e5dbf 100644 --- a/Sources/SwiftFusion/Core/VectorN.swift.gyb +++ b/Sources/SwiftFusion/Core/VectorN.swift.gyb @@ -9,7 +9,7 @@ import TensorFlow % end /// An element of R^${dim}, with Euclidean inner product. -public struct Vector${dim}: KeyPathIterable { +public struct Vector${dim}: Codable, KeyPathIterable { % for coordinate in coordinates: @differentiable public var ${coordinate}: Double % end diff --git a/Sources/SwiftFusion/Geometry/Pose2.swift b/Sources/SwiftFusion/Geometry/Pose2.swift index bc3c221d..03bbb281 100644 --- a/Sources/SwiftFusion/Geometry/Pose2.swift +++ b/Sources/SwiftFusion/Geometry/Pose2.swift @@ -35,7 +35,7 @@ import TensorFlow /// [2]: https://github.com/borglab/gtsam/blob/develop/doc/math.pdf /// [3]: Actually, we define the pullbacks because Swift doesn't support differentials very well /// yet. -public struct Pose2: LieGroup, Equatable, KeyPathIterable { +public struct Pose2: Codable, LieGroup, Equatable, KeyPathIterable { public typealias TangentVector = Vector3 // MARK: - Manifold conformance @@ -92,7 +92,7 @@ extension Pose2 { // MARK: - Global Coordinate System -public struct Pose2Coordinate: Equatable, KeyPathIterable { +public struct Pose2Coordinate: Codable, Equatable, KeyPathIterable { var t: Vector2 var rot: Rot2 } diff --git a/Sources/SwiftFusion/Geometry/Rot2.swift b/Sources/SwiftFusion/Geometry/Rot2.swift index 13aff17e..03377634 100644 --- a/Sources/SwiftFusion/Geometry/Rot2.swift +++ b/Sources/SwiftFusion/Geometry/Rot2.swift @@ -2,7 +2,7 @@ import TensorFlow /// Rot2 class is the Swift type for the SO(2) manifold of 2D Rotations around /// the origin. -public struct Rot2: Manifold, LieGroup, Equatable, KeyPathIterable { +public struct Rot2: Codable, Manifold, LieGroup, Equatable, KeyPathIterable { // MARK: - Manifold conformance @@ -81,7 +81,7 @@ extension Rot2 { // MARK: - Global coordinate system -public struct Rot2Coordinate: Equatable, KeyPathIterable { +public struct Rot2Coordinate: Codable, Equatable, KeyPathIterable { public var c, s: Double } diff --git a/Sources/SwiftFusion/Image/OrientedBoundingBox.swift b/Sources/SwiftFusion/Image/OrientedBoundingBox.swift index 23c15220..26354cf8 100644 --- a/Sources/SwiftFusion/Image/OrientedBoundingBox.swift +++ b/Sources/SwiftFusion/Image/OrientedBoundingBox.swift @@ -15,7 +15,7 @@ import PythonKit /// A rectangular region of an image, not necessarily axis-aligned. -public struct OrientedBoundingBox: Differentiable { +public struct OrientedBoundingBox: Codable, Differentiable { /// The pose of the region's center within the image. /// /// The translation component is in `(u, v)` coordinates as defined in `docs/ImageOperations.md`. From 476ff1b73517fba609c73044b6b9fcfacace7ae5 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 14:58:59 -0800 Subject: [PATCH 02/10] running experiments --- Examples/BeeTrackingTool/main.swift | 196 +++++++++++++++++- Examples/OISTVisualizationTool/main.swift | 127 +++++------- Sources/BeeDataset/OISTBeeVideo.swift | 5 + .../BeeTracking/OISTBeeVideo+Batches.swift | 12 +- Sources/BeeTracking/TrackingFactorGraph.swift | 62 ++++++ Sources/BeeTracking/TrackingMetrics.swift | 17 -- Sources/SwiftFusion/Core/Timer.swift | 26 +-- .../ProbablisticTrackingFactor.swift | 2 +- 8 files changed, 335 insertions(+), 112 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index 21312938..3d5a2683 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -9,7 +9,7 @@ import TensorFlow struct BeeTrackingTool: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self]) + subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self, NaiveRae.self]) } /// The dimension of the hidden layer in the appearance model. @@ -163,7 +163,18 @@ struct InferTrackRAE: ParsableCommand { /// Infers a track on a VOT video, using the raw pixel tracker. struct InferTrackRawPixels: ParsableCommand { func run() { - let dataset = OISTBeeVideo()! + + func rawPixelTracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeRawPixelTracker(frames: frames, target: frames[0].patch(at: start)) + tracker.optimizer.precision = 1e0 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } + } + + var dataset = OISTBeeVideo()! // Only do inference on the interesting tracks. dataset.tracks = [3, 5, 6, 7].map { dataset.tracks[$0] } let trackerEvaluationDataset = TrackerEvaluationDataset(dataset) @@ -174,9 +185,190 @@ struct InferTrackRawPixels: ParsableCommand { } } +/// Tracking with a Naive Bayes with RAE +struct NaiveRae: ParsableCommand { + @Option(help: "Where to load the RAE weights") + var loadWeights: String + + @Option(help: "The dimension of the latent code in the RAE appearance model") + var kLatentDimension: Int + + @Option(help: "The dimension of the hidden code in the RAE appearance model") + var kHiddenDimension = 100 + + @Flag + var verbose: Bool = false + + /// Returns predictions for `videoName` using the raw pixel tracker. + func naiveRaeTrack(dataset dataset_: OISTBeeVideo) { + var dataset = dataset_ + dataset.labels = dataset.labels.map { + $0.filter({ $0.label == .Body }) + } + // Make batch and do RAE + let (batch, _) = dataset.makeBatch(appearanceModelSize: (40, 70), batchSize: 200) + var statistics = FrameStatistics(batch) + statistics.mean = Tensor(62.26806976644069) + statistics.standardDeviation = Tensor(37.44683834503672) + + let backgroundBatch = dataset.makeBackgroundBatch( + patchSize: (40, 70), appearanceModelSize: (40, 70), + statistics: statistics, + batchSize: 300 + ) + + let (imageHeight, imageWidth, imageChannels) = + (batch.shape[1], batch.shape[2], batch.shape[3]) + + if verbose { print("Loading RAE model, \(batch.shape)...") } + + let np = Python.import("numpy") + + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: kLatentDimension + ) + rae.load(weights: np.load(loadWeights, allow_pickle: true)) + + if verbose { print("Fitting Naive Bayes model") } + + var (foregroundModel, backgroundModel) = ( + MultivariateGaussian( + dims: TensorShape([kLatentDimension]), + regularizer: 1e-3 + ), GaussianNB( + dims: TensorShape([kLatentDimension]), + regularizer: 1e-3 + ) + ) + + let batchPositive = rae.encode(batch) + foregroundModel.fit(batchPositive) + + let batchNegative = rae.encode(backgroundBatch) + backgroundModel.fit(batchNegative) + + if verbose { + print("Foreground: \(foregroundModel)") + print("Background: \(backgroundModel)") + } + + func tracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeNaiveBayesRAETracker( + model: rae, + statistics: statistics, + frames: frames, + targetSize: (start.rows, start.cols), + foregroundModel: foregroundModel, backgroundModel: backgroundModel + ) + tracker.optimizer.cgls_precision = 1e-9 + tracker.optimizer.precision = 1e-6 + tracker.optimizer.max_iteration = 200 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } + } + + // Only do inference on the interesting tracks. + var evalDataset = OISTBeeVideo()! + evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) + let eval = trackerEvaluationDataset.evaluate( + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: "rae") + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) + } + + func run() { + if verbose { + print("Loading dataset...") + } + + startTimer("DATASET_LOAD") + let dataset: OISTBeeVideo = OISTBeeVideo(deferLoadingFrames: true)! + stopTimer("DATASET_LOAD") + + if verbose { + print("Tracking...") + } + + startTimer("RAE_TRACKING") + naiveRaeTrack(dataset: dataset) + stopTimer("RAE_TRACKING") + + if verbose { + printTimers() + } + } +} + // It is important to set the global threadpool before doing anything else, so that nothing // accidentally uses the default threadpool. ComputeThreadPools.global = NonBlockingThreadPool(name: "mypool", threadCount: 12) BeeTrackingTool.main() + + +/// Returns a tracking configuration for a tracker using an RAE. +/// +/// Parameter model: The RAE model to use. +/// Parameter statistics: Normalization statistics for the frames. +/// Parameter frames: The frames of the video where we want to run tracking. +/// Parameter targetSize: The size of the target in the frames. +public func makeNaiveBayesRAETracker( + model: DenseRAE, + statistics: FrameStatistics, + frames: [Tensor], + targetSize: (Int, Int), + foregroundModel: MultivariateGaussian, + backgroundModel: GaussianNB +) -> TrackingConfiguration> { + var variableTemplate = VariableAssignments() + var frameVariableIDs = [Tuple1>]() + for _ in 0.. () in + let (poseID) = unpack(variables) + let (pose) = unpack(values) + graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e0, rotWeight: 1e2)) + }, + addTrackingFactor: { (variables, frame, graph) -> () in + let (poseID) = unpack(variables) + graph.store( + ProbablisticTrackingFactor(poseID, + measurement: statistics.normalized(frame), + encoder: model, + patchSize: targetSize, + appearanceModelSize: targetSize, + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e1 + ) + ) + }, + addBetweenFactor: { (variables1, variables2, graph) -> () in + let (poseID1) = unpack(variables1) + let (poseID2) = unpack(variables2) + graph.store(WeightedBetweenFactorPose2SD(poseID1, poseID2, Pose2(), sdX: 8, sdY: 4.6, sdTheta: 0.3)) + }) +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index 240d6c39..c60bf905 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -24,7 +24,7 @@ import Foundation struct OISTVisualizationTool: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self]) + subcommands: [VisualizePrediction.self, VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self]) } /// View a frame with bounding boxes @@ -510,26 +510,14 @@ struct TrainRAE: ParsableCommand { /// /// Tracking with a Naive Bayes with RAE struct NaivePca: ParsableCommand { - @Option(help: "Where to load the RAE weights") - var loadWeights: String = "./oist_rae_weight.npy" - - @Option(help: "Which bounding box to track") - var boxId: Int = 0 - - @Option(help: "Track for how many frames") - var trackFrames: Int = 10 - - @Option(help: "Track the target from frame x") - var trackStartFrame: Int = 250 - @Option(help: "The dimension of the latent code in the RAE appearance model") - var kLatentDimension = 10 + var kLatentDimension = 20 @Flag(help: "Print progress information") var verbose: Bool = false /// Returns predictions for `videoName` using the raw pixel tracker. - func naivePpcaTrack(dataset dataset_: OISTBeeVideo, length: Int, startFrom: Int) -> [OrientedBoundingBox] { + func naivePpcaTrack(dataset dataset_: OISTBeeVideo) { var dataset = dataset_ dataset.labels = dataset.labels.map { $0.filter({ $0.label == .Body }) @@ -573,48 +561,32 @@ struct NaivePca: ParsableCommand { print("Background: \(backgroundModel)") } - if verbose { print("Loading video frames...") } - startTimer("VIDEO_LOAD") - // Load the video and take a slice of it. - let videos = (0.. Tensor in - return withDevice(.cpu) { dataset.loadFrame(dataset.frameIds[startFrom + i])! } - } - stopTimer("VIDEO_LOAD") - - let startPose = dataset.labels[startFrom][boxId].location.center - - if verbose { - print("Creating tracker, startPose = \(startPose)") - } - - startTimer("MAKE_GRAPH") - var tracker = makeNaiveBayesPCATracker( - model: ppca, - statistics: statistics, - frames: videos, - targetSize: (dataset.labels[startFrom][boxId].location.rows, dataset.labels[startFrom][boxId].location.cols), - foregroundModel: foregroundModel, backgroundModel: backgroundModel - ) - stopTimer("MAKE_GRAPH") - - if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } - - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 - - startTimer("GRAPH_INFER") - let prediction = tracker.infer(knownStart: Tuple1(startPose)) - stopTimer("GRAPH_INFER") - - let boxes = tracker.frameVariableIDs.map { frameVariableIDs -> OrientedBoundingBox in - let poseID = frameVariableIDs.head - return OrientedBoundingBox( - center: prediction[poseID], rows: dataset.labels[startFrom][boxId].location.rows, cols: dataset.labels[startFrom][boxId].location.cols) + func tracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeNaiveBayesPCATracker( + model: ppca, + statistics: statistics, + frames: frames, + targetSize: (start.rows, start.cols), + foregroundModel: foregroundModel, backgroundModel: backgroundModel + ) + tracker.optimizer.cgls_precision = 1e-9 + tracker.optimizer.precision = 1e-6 + tracker.optimizer.max_iteration = 200 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } } - return boxes + // Only do inference on the interesting tracks. + var evalDataset = OISTBeeVideo()! + evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) + let eval = trackerEvaluationDataset.evaluate( + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: "rae") + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) } func run() { @@ -631,25 +603,8 @@ struct NaivePca: ParsableCommand { } startTimer("PPCA_TRACKING") - var bboxes: [OrientedBoundingBox] - bboxes = naivePpcaTrack(dataset: dataset, length: trackFrames, startFrom: trackStartFrame) + naivePpcaTrack(dataset: dataset) stopTimer("PPCA_TRACKING") - - let frameRawId = dataset.frameIds[trackStartFrame + trackFrames] - let image = dataset.loadFrame(frameRawId)! - - if verbose { - print("Creating output plot") - } - startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() - stopTimer("PLOTTING") - - if verbose { - printTimers() - } } } @@ -666,6 +621,32 @@ struct VisualizeTrack: ParsableCommand { } } +struct VisualizePrediction: ParsableCommand { + @Option + var prediction: String + + @Option + var subsequenceIndex: Int = 0 + + // TODO: I think I should save this in the prediction so that we do not need to specify it! + @Option + var startFrame: Int + + @Option + var output: String + + func run() { + let dataset = OISTBeeVideo(deferLoadingFrames: true)! + let decoder = JSONDecoder() + let data = try! Data(contentsOf: URL(fileURLWithPath: prediction)) + let sequence = try! decoder.decode(SequenceEvaluationResults.self, from: data) + + let track = OISTBeeTrack( + startFrameIndex: startFrame, boxes: sequence.subsequences[subsequenceIndex].prediction) + track.render(to: output, video: dataset) + } +} + // It is important to set the global threadpool before doing anything else, so that nothing // accidentally uses the default threadpool. ComputeThreadPools.global = diff --git a/Sources/BeeDataset/OISTBeeVideo.swift b/Sources/BeeDataset/OISTBeeVideo.swift index 88045d4b..b73c9a21 100644 --- a/Sources/BeeDataset/OISTBeeVideo.swift +++ b/Sources/BeeDataset/OISTBeeVideo.swift @@ -102,6 +102,11 @@ public struct OISTBeeTrack { /// The positions of the bee at each frame. public var boxes: [OrientedBoundingBox] + + public init(startFrameIndex: Int, boxes: [OrientedBoundingBox]) { + self.startFrameIndex = startFrameIndex + self.boxes = boxes + } } /// For output integers with padding diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index d54681a3..8828b2b0 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -66,12 +66,12 @@ extension OISTBeeVideo { Double.random(in: Double(maxSide).., _ endId: TypedID, _ difference: Pose, sdX: Double, sdY: Double, sdTheta: Double) { + self.edges = Tuple2(startId, endId) + self.difference = difference + self.sdX = sdX + self.sdY = sdY + self.sdTheta = sdTheta + } + + @differentiable + public func errorVector(_ start: Pose, _ end: Pose) -> Pose.TangentVector { + let actualMotion = between(start, end) + let local = difference.localCoordinate(actualMotion) + return Vector3(local.x / sdTheta, local.y / sdX, local.z / sdY) + } +} + +public struct WeightedPriorFactorPose2: LinearizableFactor1 { + public typealias Pose = Pose2 + public let edges: Variables.Indices + public let prior: Pose + public let weight: Double + public let rotWeight: Double + + public init(_ startId: TypedID, _ prior: Pose, weight: Double, rotWeight: Double = 1.0) { + self.edges = Tuple1(startId) + self.prior = prior + self.weight = weight + self.rotWeight = rotWeight + } + + @differentiable + public func errorVector(_ start: Pose) -> Pose.TangentVector { + let weighted = weight * prior.localCoordinate(start) + return Vector3(rotWeight * weighted.x, weighted.y, weighted.z) + } +} + + /// A specification for a factor graph that tracks a target in a sequence of frames. public struct TrackingConfiguration { /// The frames of the video to track. @@ -159,6 +206,21 @@ public struct TrackingConfiguration { // near its current position. addPriorFactor(frameVariableIDs[i], x[frameVariableIDs[i]], &g) + let currentVarID = (frameVariableIDs[i + 1] as! Tuple1>).head + let initialPose = x[currentVarID] + var bestPose = x[currentVarID] + var bestError = g.error(at: x) + for dtheta in stride(from: -.pi / 2 as Double, to: .pi / 2, by: .pi / 10) { + let candidatePose = Pose2(Rot2(dtheta) * initialPose.rot, initialPose.t) + x[currentVarID] = candidatePose + let candidateError = g.error(at: x) + if candidateError < bestError { + bestError = candidateError + bestPose = candidatePose + } + } + x[currentVarID] = bestPose + // Optimize the factor graph. try? optimizer.optimize(graph: g, initial: &x) } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index 1916f9f5..6efd0a7b 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -1,9 +1,6 @@ import BeeDataset -<<<<<<< HEAD -======= import Foundation import PenguinParallelWithFoundation ->>>>>>> bb5b886 (make the evaluation data codable) import PythonKit import SwiftFusion import TensorFlow @@ -180,13 +177,8 @@ extension TrackerEvaluationDataset { ) -> TrackerEvaluationResults { let sequenceEvaluations = sequences.prefix(sequenceCount).enumerated().map { (i, sequence) -> SequenceEvaluationResults in -<<<<<<< HEAD - print("Evaluating sequence \(i + 1) of \(sequences.count)") - return sequence.evaluate(tracker) -======= print("Evaluating sequence \(i + 1) of \(sequenceCount)") return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)") ->>>>>>> bb5b886 (make the evaluation data codable) } let result = TrackerEvaluationResults( sequences: sequenceEvaluations, @@ -221,14 +213,6 @@ public struct TrackerEvaluationSequence { extension TrackerEvaluationSequence { /// Returns the performance of `tracker` on the sequence `self`. -<<<<<<< HEAD - public func evaluate(_ tracker: Tracker) -> SequenceEvaluationResults { - let subsequences = self.subsequences(deltaAnchor: 50) - let subsequenceEvaluations = subsequences.enumerated().map { - (i, subsequence) -> (metrics: SubsequenceMetrics, prediction: [OrientedBoundingBox]) in - print("Evaluating subsequence \(i + 1) of \(subsequences.count)") - return subsequence.evaluateSubsequence(tracker) -======= public func evaluate(_ tracker: Tracker, deltaAnchor: Int, outputFile: String) -> SequenceEvaluationResults { guard let _ = try? Python.attemptImport("shapely") else { print("python shapely library must be installed") @@ -258,7 +242,6 @@ extension TrackerEvaluationSequence { SubsequenceEvaluationResults( metrics: SubsequenceMetrics(groundTruth: $0.0.groundTruth, prediction: $0.1), prediction: $0.1) ->>>>>>> bb5b886 (make the evaluation data codable) } let result = SequenceEvaluationResults( subsequences: subsequenceEvaluations, diff --git a/Sources/SwiftFusion/Core/Timer.swift b/Sources/SwiftFusion/Core/Timer.swift index ee6c5e72..1700c931 100644 --- a/Sources/SwiftFusion/Core/Timer.swift +++ b/Sources/SwiftFusion/Core/Timer.swift @@ -11,37 +11,37 @@ fileprivate var accumulatedTimers: [String: UInt64] = [:] /// /// Precondition: The `name` timer is not running. public func startTimer(_ name: String) { - guard startedTimers[name] == nil else { preconditionFailure("timer \(name) is already started") } - startedTimers[name] = DispatchTime.now().uptimeNanoseconds +// guard startedTimers[name] == nil else { preconditionFailure("timer \(name) is already started") } +// startedTimers[name] = DispatchTime.now().uptimeNanoseconds } /// Stop the `name` timer. /// /// Precondition: The `name` timer is running. public func stopTimer(_ name: String) { - guard let start = startedTimers[name] else { preconditionFailure("timer \(name) is not running") } - startedTimers[name] = nil - accumulatedTimers[name, default: 0] += DispatchTime.now().uptimeNanoseconds - start +// guard let start = startedTimers[name] else { preconditionFailure("timer \(name) is not running") } +// startedTimers[name] = nil +// accumulatedTimers[name, default: 0] += DispatchTime.now().uptimeNanoseconds - start } /// Print the total times accumulated for each timer. public func printTimers() { - guard startedTimers.count == 0 else { preconditionFailure("timers are still running: \(startedTimers)") } - for (name, duration) in accumulatedTimers { - print("\(name): \(Double(duration) / 1e9) seconds") - } +// guard startedTimers.count == 0 else { preconditionFailure("timers are still running: \(startedTimers)") } +// for (name, duration) in accumulatedTimers { +// print("\(name): \(Double(duration) / 1e9) seconds") +// } } fileprivate var counters: [String: Int] = [:] /// Increment the `name` counter. public func incrementCounter(_ name: String) { - counters[name, default: 0] += 1 +// counters[name, default: 0] += 1 } /// Print the total counts for each counter. public func printCounters() { - for (name, count) in counters { - print("\(name): \(count)") - } +// for (name, count) in counters { +// print("\(name): \(count)") +// } } diff --git a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift index 585948e4..528cfd65 100644 --- a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift @@ -85,6 +85,6 @@ public struct ProbablisticTrackingFactor< } /// TODO: What is the idiomatic way of avoiding negative probability here? - return Vector1(result) + return Vector1(sqrtWrap(result)) } } From 2bdf9b5dcd97fc0d7c5d06e4093099eb838be075 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 16:24:27 -0800 Subject: [PATCH 03/10] add random projections --- Examples/OISTVisualizationTool/main.swift | 19 ++++++++++++++++--- Sources/SwiftFusion/Inference/PPCA.swift | 6 +++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index c60bf905..fc385361 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -516,6 +516,15 @@ struct NaivePca: ParsableCommand { @Flag(help: "Print progress information") var verbose: Bool = false + @Flag(help: "Use random projections instead of learned PPCA vectors") + var randomProjections: Bool = false + + @Option + var outputFile: String + + @Option + var truncate: Int = 50 + /// Returns predictions for `videoName` using the raw pixel tracker. func naivePpcaTrack(dataset dataset_: OISTBeeVideo) { var dataset = dataset_ @@ -537,7 +546,11 @@ struct NaivePca: ParsableCommand { var ppca = PPCA(latentSize: kLatentDimension) ppca.train(images: batch) - + + if randomProjections { + ppca.W_inv = Tensor(randomNormal: ppca.W_inv!.shape) + } + if verbose { print("Fitting Naive Bayes model") } var (foregroundModel, backgroundModel) = ( @@ -580,11 +593,11 @@ struct NaivePca: ParsableCommand { } // Only do inference on the interesting tracks. - var evalDataset = OISTBeeVideo()! + var evalDataset = OISTBeeVideo(truncate: truncate)! evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) let eval = trackerEvaluationDataset.evaluate( - tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: "rae") + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile) print(eval.trackerMetrics.accuracy) print(eval.trackerMetrics.robustness) } diff --git a/Sources/SwiftFusion/Inference/PPCA.swift b/Sources/SwiftFusion/Inference/PPCA.swift index 4b5740c0..3f7c1bf5 100644 --- a/Sources/SwiftFusion/Inference/PPCA.swift +++ b/Sources/SwiftFusion/Inference/PPCA.swift @@ -105,7 +105,7 @@ public struct PPCA { public func encode(_ image: Patch) -> Tensor { precondition(image.rank == 3 || (image.rank == 4), "wrong latent dimension \(image.shape)") let (H_, W_, C_) = (W.shape[0], W.shape[1], W.shape[2]) - if image.rank == 4 { + if slowPath(image) { let v_T = (image - mu).reshaped(to: [H_ * W_ * C_, image.shape[0]]).transposed() return matmul(v_T, W_inv!.transposed()).reshaped(to: [image.shape[0], latent_size]) } else { @@ -113,6 +113,10 @@ public struct PPCA { } } + private func slowPath(_ image: Tensor) -> Bool { + image.rank == 4 && image.shape[0] > 1 + } + /// Generate an image and corresponding Jacobian according to a latent /// Input: [latent_size] or [latent_size, 1] public func decodeWithJacobian(_ latent: Tensor) -> (Patch, Patch.TangentVector) { From 66e003e9a87f199b2d460581d5f5c646f58c8ac4 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 16:27:35 -0800 Subject: [PATCH 04/10] fix ppca encode --- Sources/SwiftFusion/Inference/PPCA.swift | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Sources/SwiftFusion/Inference/PPCA.swift b/Sources/SwiftFusion/Inference/PPCA.swift index 3f7c1bf5..ca2b1da8 100644 --- a/Sources/SwiftFusion/Inference/PPCA.swift +++ b/Sources/SwiftFusion/Inference/PPCA.swift @@ -105,18 +105,18 @@ public struct PPCA { public func encode(_ image: Patch) -> Tensor { precondition(image.rank == 3 || (image.rank == 4), "wrong latent dimension \(image.shape)") let (H_, W_, C_) = (W.shape[0], W.shape[1], W.shape[2]) - if slowPath(image) { - let v_T = (image - mu).reshaped(to: [H_ * W_ * C_, image.shape[0]]).transposed() - return matmul(v_T, W_inv!.transposed()).reshaped(to: [image.shape[0], latent_size]) + if image.rank == 4 { + if image.shape[0] == 1 { + return matmul(W_inv!, (image - mu).reshaped(to: [H_ * W_ * C_, 1])).reshaped(to: [1, latent_size]) + } else { + let v_T = (image - mu).reshaped(to: [H_ * W_ * C_, image.shape[0]]).transposed() + return matmul(v_T, W_inv!.transposed()).reshaped(to: [image.shape[0], latent_size]) + } } else { return matmul(W_inv!, (image - mu).reshaped(to: [H_ * W_ * C_, 1])).reshaped(to: [latent_size]) } } - private func slowPath(_ image: Tensor) -> Bool { - image.rank == 4 && image.shape[0] > 1 - } - /// Generate an image and corresponding Jacobian according to a latent /// Input: [latent_size] or [latent_size, 1] public func decodeWithJacobian(_ latent: Tensor) -> (Patch, Patch.TangentVector) { From 96b5a107045c53b950f23b650702b18e75a62d92 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 17:01:53 -0800 Subject: [PATCH 05/10] fix possible divide by zero --- Sources/BeeTracking/TrackingMetrics.swift | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index 6efd0a7b..4c5857dc 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -54,8 +54,8 @@ public struct SubsequenceMetrics: Codable { self.averageOverlap = averageOverlap self.NFsa = NFsa self.Nsa = overlaps.count - self.accuracy = overlaps[0.. Date: Fri, 13 Nov 2020 20:16:07 -0800 Subject: [PATCH 06/10] make it sample from the motion model before doing lm --- Examples/BeeTrackingTool/main.swift | 5 ++++- Sources/BeeTracking/TrackingFactorGraph.swift | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index 3d5a2683..727ef604 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -199,6 +199,9 @@ struct NaiveRae: ParsableCommand { @Flag var verbose: Bool = false + @Option + var outputFile: String + /// Returns predictions for `videoName` using the raw pixel tracker. func naiveRaeTrack(dataset dataset_: OISTBeeVideo) { var dataset = dataset_ @@ -276,7 +279,7 @@ struct NaiveRae: ParsableCommand { evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) let eval = trackerEvaluationDataset.evaluate( - tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: "rae") + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile) print(eval.trackerMetrics.accuracy) print(eval.trackerMetrics.robustness) } diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index 77fdfd84..c516b197 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -192,9 +192,7 @@ public struct TrackingConfiguration { // Initialize the variables one frame at a time. Each iteration intializes the `i+1`-th // variable. for i in 0..<(frames.count - 1) { - if i % 10 == 0 { - print("Inferring for frame \(i + 1) of \(frames.count - 1)") - } + print("Inferring for frame \(i + 1) of \(frames.count - 1)") // Set the initial guess of the `i+1`-th variable to the value of the previous variable. x[frameVariableIDs[i + 1]] = x[frameVariableIDs[i], as: FrameVariables.self] @@ -210,8 +208,12 @@ public struct TrackingConfiguration { let initialPose = x[currentVarID] var bestPose = x[currentVarID] var bestError = g.error(at: x) - for dtheta in stride(from: -.pi / 2 as Double, to: .pi / 2, by: .pi / 10) { - let candidatePose = Pose2(Rot2(dtheta) * initialPose.rot, initialPose.t) + for _ in 0..<1000 { + let noise = Tensor(randomNormal: [3]).scalars + let candidatePose = initialPose.retract(Vector3( + 0.3 * noise[0], + 8 * noise[1], + 4.6 * noise[2])) x[currentVarID] = candidatePose let candidateError = g.error(at: x) if candidateError < bestError { From 2eef598e970b1e4896020d6c7e22b2a506aee952 Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Fri, 13 Nov 2020 20:21:42 -0800 Subject: [PATCH 07/10] add truncate option --- Examples/BeeTrackingTool/main.swift | 5 ++++- Examples/OISTVisualizationTool/main.swift | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index 727ef604..42285f34 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -202,6 +202,9 @@ struct NaiveRae: ParsableCommand { @Option var outputFile: String + @Option + var truncate: Int + /// Returns predictions for `videoName` using the raw pixel tracker. func naiveRaeTrack(dataset dataset_: OISTBeeVideo) { var dataset = dataset_ @@ -275,7 +278,7 @@ struct NaiveRae: ParsableCommand { } // Only do inference on the interesting tracks. - var evalDataset = OISTBeeVideo()! + var evalDataset = OISTBeeVideo(truncate: truncate)! evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) let eval = trackerEvaluationDataset.evaluate( diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index fc385361..222f4739 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -523,7 +523,7 @@ struct NaivePca: ParsableCommand { var outputFile: String @Option - var truncate: Int = 50 + var truncate: Int /// Returns predictions for `videoName` using the raw pixel tracker. func naivePpcaTrack(dataset dataset_: OISTBeeVideo) { From 59f381ff73198327e076646f015f68246c19b06f Mon Sep 17 00:00:00 2001 From: Marc Rasi Date: Sat, 14 Nov 2020 19:33:26 -0800 Subject: [PATCH 08/10] more work --- Examples/BeeTrackingTool/main.swift | 9 ++- Sources/BeeTracking/TrackingFactorGraph.swift | 65 +++++++++++++++---- 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index 42285f34..f9b6e517 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -267,8 +267,8 @@ struct NaiveRae: ParsableCommand { targetSize: (start.rows, start.cols), foregroundModel: foregroundModel, backgroundModel: backgroundModel ) - tracker.optimizer.cgls_precision = 1e-9 - tracker.optimizer.precision = 1e-6 + tracker.optimizer.cgls_precision = 1e-5 + tracker.optimizer.precision = 1e-3 tracker.optimizer.max_iteration = 200 let prediction = tracker.infer(knownStart: Tuple1(start.center)) return tracker.frameVariableIDs.map { varIds in @@ -367,6 +367,11 @@ public func makeNaiveBayesRAETracker( let (poseID1) = unpack(variables1) let (poseID2) = unpack(variables2) graph.store(WeightedBetweenFactorPose2SD(poseID1, poseID2, Pose2(), sdX: 8, sdY: 4.6, sdTheta: 0.3)) + }, + addFixedBetweenFactor: { (values, variables, graph) -> () in + let (prior) = unpack(values) + let (poseID) = unpack(variables) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) }) } diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index c516b197..f316142c 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -108,6 +108,28 @@ public struct WeightedPriorFactorPose2: LinearizableFactor1 { } } +public struct WeightedPriorFactorPose2SD: LinearizableFactor1 { + public typealias Pose = Pose2 + public let edges: Variables.Indices + public let prior: Pose + public let sdX: Double + public let sdY: Double + public let sdTheta: Double + + public init(_ startId: TypedID, _ prior: Pose, sdX: Double, sdY: Double, sdTheta: Double) { + self.edges = Tuple1(startId) + self.prior = prior + self.sdX = sdX + self.sdY = sdY + self.sdTheta = sdTheta + } + + @differentiable + public func errorVector(_ start: Pose) -> Pose.TangentVector { + let local = prior.localCoordinate(start) + return Vector3(local.x / sdTheta, local.y / sdX, local.z / sdY) + } +} /// A specification for a factor graph that tracks a target in a sequence of frames. public struct TrackingConfiguration { @@ -136,6 +158,16 @@ public struct TrackingConfiguration { _ graph: inout FactorGraph ) -> () + /// Adds to `graph` "between factor(s)" between `constantVariables` and `variables` that treat + /// the `constantVariables` as fixed. + /// + /// This is used during frame-by-frame initialization to constrain frame `i + 1` by a between + /// factor on the value from frame `i` without optimizing the value of frame `i`. + public let addFixedBetweenFactor: ( + _ values: FrameVariables, _ variables: FrameVariables.Indices, + _ graph: inout FactorGraph + ) -> () + /// The optimizer to use during inference. public var optimizer = LM() @@ -155,14 +187,25 @@ public struct TrackingConfiguration { addBetweenFactor: @escaping ( _ variables1: FrameVariables.Indices, _ variables2: FrameVariables.Indices, _ graph: inout FactorGraph - ) -> () + ) -> (), + addFixedBetweenFactor: (( + _ values: FrameVariables, _ variables: FrameVariables.Indices, + _ graph: inout FactorGraph + ) -> ())? = nil ) { + precondition( + addFixedBetweenFactor != nil, + "I added a runtime check for this argument so that I would not have to change all " + + "callers before compiling. It is actually required." + ) + self.frames = frames self.variableTemplate = variableTemplate self.frameVariableIDs = frameVariableIDs self.addPriorFactor = addPriorFactor self.addTrackingFactor = addTrackingFactor self.addBetweenFactor = addBetweenFactor + self.addFixedBetweenFactor = addFixedBetweenFactor! self.optimizer.precision = 1e-1 self.optimizer.max_iteration = 100 @@ -197,34 +240,32 @@ public struct TrackingConfiguration { // Set the initial guess of the `i+1`-th variable to the value of the previous variable. x[frameVariableIDs[i + 1]] = x[frameVariableIDs[i], as: FrameVariables.self] - // Create a tracking factor graph on just the `i`-tha dn `i+1`-th variables - var g = graph(on: i..<(i + 2)) + // Create a tracking factor graph on just the `i+1`-th variable. + var g = graph(on: (i + 1)..<(i + 2)) // The `i`-th variable is already initialized well, so add a prior factor that it stays // near its current position. - addPriorFactor(frameVariableIDs[i], x[frameVariableIDs[i]], &g) + addFixedBetweenFactor(x[frameVariableIDs[i]], frameVariableIDs[i + 1], &g) + let previousVarID = (frameVariableIDs[i] as! Tuple1>).head let currentVarID = (frameVariableIDs[i + 1] as! Tuple1>).head - let initialPose = x[currentVarID] + let previousPose = x[previousVarID] var bestPose = x[currentVarID] var bestError = g.error(at: x) - for _ in 0..<1000 { + for _ in 0..<5 { let noise = Tensor(randomNormal: [3]).scalars - let candidatePose = initialPose.retract(Vector3( + x[currentVarID] = previousPose.retract(Vector3( 0.3 * noise[0], 8 * noise[1], 4.6 * noise[2])) - x[currentVarID] = candidatePose + try? optimizer.optimize(graph: g, initial: &x) let candidateError = g.error(at: x) if candidateError < bestError { bestError = candidateError - bestPose = candidatePose + bestPose = x[currentVarID] } } x[currentVarID] = bestPose - - // Optimize the factor graph. - try? optimizer.optimize(graph: g, initial: &x) } // We could also do a final optimization on all the variables jointly here. From 8c6c3825e6b49a3e76ec13ef79cf36d1a1989e9f Mon Sep 17 00:00:00 2001 From: Frank dellaert Date: Sun, 15 Nov 2020 12:55:30 -0500 Subject: [PATCH 09/10] Ignore data directory --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cc25848d..dc85c39e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ xcuserdata/ /.swiftpm /.vscode -/.idea \ No newline at end of file +/.idea +/OIST_Data/ From 3633624833b97f661ae969e81bbbcbac1c5958c2 Mon Sep 17 00:00:00 2001 From: Frank dellaert Date: Sun, 15 Nov 2020 13:08:39 -0500 Subject: [PATCH 10/10] Moved AE likelihood tracker to its own file, removed from private Utils file --- Examples/BeeTrackingTool/main.swift | 69 +-------------- Examples/OISTVisualizationTool/Util.swift | 57 ------------- Examples/OISTVisualizationTool/main.swift | 2 +- Sources/BeeTracking/NaiveBayesAETracker.swift | 83 +++++++++++++++++++ 4 files changed, 85 insertions(+), 126 deletions(-) create mode 100644 Sources/BeeTracking/NaiveBayesAETracker.swift diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index e1275bf6..5949e197 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -262,7 +262,7 @@ struct NaiveRae: ParsableCommand { } func tracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { - var tracker = makeNaiveBayesRAETracker( + var tracker = makeNaiveBayesAETracker( model: rae, statistics: statistics, frames: frames, @@ -320,70 +320,3 @@ ComputeThreadPools.global = NonBlockingThreadPool(name: "mypool", threadCount: 12) BeeTrackingTool.main() - - -/// Returns a tracking configuration for a tracker using an RAE. -/// -/// Parameter model: The RAE model to use. -/// Parameter statistics: Normalization statistics for the frames. -/// Parameter frames: The frames of the video where we want to run tracking. -/// Parameter targetSize: The size of the target in the frames. -public func makeNaiveBayesRAETracker( - model: DenseRAE, - statistics: FrameStatistics, - frames: [Tensor], - targetSize: (Int, Int), - foregroundModel: MultivariateGaussian, - backgroundModel: GaussianNB -) -> TrackingConfiguration> { - var variableTemplate = VariableAssignments() - var frameVariableIDs = [Tuple1>]() - for _ in 0.. () in - let (poseID) = unpack(variables) - let (pose) = unpack(values) - graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e0, rotWeight: 1e2)) - }, - addTrackingFactor: { (variables, frame, graph) -> () in - let (poseID) = unpack(variables) - graph.store( - ProbablisticTrackingFactor(poseID, - measurement: statistics.normalized(frame), - encoder: model, - patchSize: targetSize, - appearanceModelSize: targetSize, - foregroundModel: foregroundModel, - backgroundModel: backgroundModel, - maxPossibleNegativity: 1e1 - ) - ) - }, - addBetweenFactor: { (variables1, variables2, graph) -> () in - let (poseID1) = unpack(variables1) - let (poseID2) = unpack(variables2) - graph.store(WeightedBetweenFactorPose2SD(poseID1, poseID2, Pose2(), sdX: 8, sdY: 4.6, sdTheta: 0.3)) - }, - addFixedBetweenFactor: { (values, variables, graph) -> () in - let (prior) = unpack(values) - let (poseID) = unpack(variables) - graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) - }) -} - -/// Returns `t` as a Swift tuple. -fileprivate func unpack(_ t: Tuple2) -> (A, B) { - return (t.head, t.tail.head) -} -/// Returns `t` as a Swift tuple. -fileprivate func unpack(_ t: Tuple1) -> (A) { - return (t.head) -} diff --git a/Examples/OISTVisualizationTool/Util.swift b/Examples/OISTVisualizationTool/Util.swift index 729fa2c6..4b648551 100644 --- a/Examples/OISTVisualizationTool/Util.swift +++ b/Examples/OISTVisualizationTool/Util.swift @@ -43,63 +43,6 @@ public func makeOISTTrainingBatch(dataset: OISTBeeVideo, appearanceModelSize: (I } -/// Returns a tracking configuration for a tracker using an RAE. -/// -/// Parameter model: The RAE model to use. -/// Parameter statistics: Normalization statistics for the frames. -/// Parameter frames: The frames of the video where we want to run tracking. -/// Parameter targetSize: The size of the target in the frames. -public func makeNaiveBayesRAETracker( - model: DenseRAE, - statistics: FrameStatistics, - frames: [Tensor], - targetSize: (Int, Int), - foregroundModel: MultivariateGaussian, - backgroundModel: GaussianNB -) -> TrackingConfiguration> { - var variableTemplate = VariableAssignments() - var frameVariableIDs = [Tuple1>]() - for _ in 0.. () in - let (poseID) = unpack(variables) - let (pose) = unpack(values) - graph.store(WeightedPriorFactor(poseID, pose, weight: 1e-1)) - }, - addTrackingFactor: { (variables, frame, graph) -> () in - let (poseID) = unpack(variables) - graph.store( - ProbablisticTrackingFactor(poseID, - measurement: statistics.normalized(frame), - encoder: model, - patchSize: targetSize, - appearanceModelSize: targetSize, - foregroundModel: foregroundModel, - backgroundModel: backgroundModel, - maxPossibleNegativity: 1e1 - ) - ) - }, - addBetweenFactor: { (variables1, variables2, graph) -> () in - let (poseID1) = unpack(variables1) - let (poseID2) = unpack(variables2) - graph.store(WeightedBetweenFactorPose2(poseID1, poseID2, Pose2(), weight: 1e-1, rotWeight: 1e2)) - }), - addFixedBetweenFactor: { (values, variables, graph) -> () in - let (prior) = unpack(values) - let (poseID) = unpack(variables) - graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) - }) -} - /// Returns a tracking configuration for a tracker using PCA. /// /// Parameter model: The PCA model to use. diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index 222f4739..a13c4279 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -358,7 +358,7 @@ struct NaiveRae: ParsableCommand { } startTimer("MAKE_GRAPH") - var tracker = makeNaiveBayesRAETracker( + var tracker = makeNaiveBayesAETracker( model: rae, statistics: statistics, frames: videos, diff --git a/Sources/BeeTracking/NaiveBayesAETracker.swift b/Sources/BeeTracking/NaiveBayesAETracker.swift new file mode 100644 index 00000000..dc727637 --- /dev/null +++ b/Sources/BeeTracking/NaiveBayesAETracker.swift @@ -0,0 +1,83 @@ +// Copyright 2020 The SwiftFusion Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftFusion +import TensorFlow +import PenguinStructures + +/// Returns a tracking configuration for a tracker using an RAE. +/// +/// Parameter model: The RAE model to use. +/// Parameter statistics: Normalization statistics for the frames. +/// Parameter frames: The frames of the video where we want to run tracking. +/// Parameter targetSize: The size of the target in the frames. +public func makeNaiveBayesAETracker( + model: DenseRAE, + statistics: FrameStatistics, + frames: [Tensor], + targetSize: (Int, Int), + foregroundModel: MultivariateGaussian, + backgroundModel: GaussianNB +) -> TrackingConfiguration> { + var variableTemplate = VariableAssignments() + var frameVariableIDs = [Tuple1>]() + for _ in 0.. () in + let (poseID) = unpack(variables) + let (pose) = unpack(values) + graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e0, rotWeight: 1e2)) + }, + addTrackingFactor: { (variables, frame, graph) -> () in + let (poseID) = unpack(variables) + graph.store( + ProbablisticTrackingFactor(poseID, + measurement: statistics.normalized(frame), + encoder: model, + patchSize: targetSize, + appearanceModelSize: targetSize, + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e1 + ) + ) + }, + addBetweenFactor: { (variables1, variables2, graph) -> () in + let (poseID1) = unpack(variables1) + let (poseID2) = unpack(variables2) + graph.store(WeightedBetweenFactorPose2SD(poseID1, poseID2, Pose2(), sdX: 8, sdY: 4.6, sdTheta: 0.3)) + }, + addFixedBetweenFactor: { (values, variables, graph) -> () in + let (prior) = unpack(values) + let (poseID) = unpack(variables) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) + }) +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +}