diff --git a/repository/BioTools-Tests/BioLocusTest.class.st b/repository/BioTools-Tests/BioLocusTest.class.st new file mode 100644 index 00000000..b980a851 --- /dev/null +++ b/repository/BioTools-Tests/BioLocusTest.class.st @@ -0,0 +1,80 @@ +" +Tests for BioLocus interval and coordinate behavior. +" +Class { + #name : 'BioLocusTest', + #superclass : 'TestCase', + #category : 'BioTools-Tests', + #package : 'BioTools-Tests' +} + +{ #category : 'tests' } +BioLocusTest >> testContainsLocusAndUnion [ + | outer inner disjoint union | + outer := BioLocus new + name: 'outer'; + chrLocation: 'chr2'; + from: 10 to: 100; + yourself. + inner := BioLocus new + name: 'inner'; + chrLocation: 'chr2'; + from: 25 to: 40; + yourself. + disjoint := BioLocus new + name: 'disjoint'; + chrLocation: 'chr2'; + from: 150 to: 200; + yourself. + + self assert: (outer containsLocus: inner). + self deny: (inner containsLocus: outer). + self deny: (outer overlaps: disjoint). + self assert: (outer distanceTo: disjoint) equals: 50. + + union := outer unionWith: disjoint. + self assert: union notNil. + self assert: union start equals: 10. + self assert: union end equals: 200 +] + +{ #category : 'tests' } +BioLocusTest >> testCoordinateConversionsAndNormalization [ + | locus | + locus := BioLocus new + name: 'x'; + chrLocation: 'chrX'; + from: 30 to: 10; + yourself. + + self deny: locus isValidRange. + locus normalizeBounds. + self assert: locus isValidRange. + self assert: locus start equals: 10. + self assert: locus end equals: 30. + self assert: locus length equals: 21. + self assert: locus asBedInterval equals: #(9 30). + self assert: locus asGffInterval equals: #(10 30) +] + +{ #category : 'tests' } +BioLocusTest >> testOverlapIntersectionAndDistance [ + | a b i | + a := BioLocus new + name: 'a'; + chrLocation: 'chr1'; + from: 100 to: 200; + yourself. + b := BioLocus new + name: 'b'; + chrLocation: 'chr1'; + from: 150 to: 250; + yourself. + + self assert: (a overlaps: b). + i := a intersectionWith: b. + self assert: i notNil. + self assert: i start equals: 150. + self assert: i end equals: 200. + self assert: (a distanceTo: b) equals: 0 +] diff --git a/repository/BioTools/BioLocus.class.st b/repository/BioTools/BioLocus.class.st index c49eab57..728be187 100644 --- a/repository/BioTools/BioLocus.class.st +++ b/repository/BioTools/BioLocus.class.st @@ -31,21 +31,70 @@ Class { { #category : 'examples' } BioLocus class >> example01 [ - ^ self class new + ^ self new name: 'BM1818'; chrLocation: 'D23S21'; - beSimpleRepeat; yourself ] +{ #category : 'instance creation' } +BioLocus class >> newFrom: aStart to: anEnd chr: aChr [ + ^ self new + start: aStart; + end: anEnd; + recomputeLength; + chrLocation: aChr; + yourself +] + +{ #category : 'comparing' } +BioLocus >> < aBioLocus [ + "Sort by chromosome then start then end then name." + self chrLocation = aBioLocus chrLocation ifFalse: [ + ^ (self chrLocation ifNil: [ '' ]) < (aBioLocus chrLocation ifNil: [ '' ]) ]. + self start = aBioLocus start ifFalse: [ ^ (self start ifNil: [ 0 ]) < (aBioLocus start ifNil: [ 0 ]) ]. + self end = aBioLocus end ifFalse: [ ^ (self end ifNil: [ 0 ]) < (aBioLocus end ifNil: [ 0 ]) ]. + ^ (self name ifNil: [ '' ]) < (aBioLocus name ifNil: [ '' ]) +] + +{ #category : 'comparing' } +BioLocus >> = anObject [ + self == anObject ifTrue: [ ^ true ]. + (self class = anObject class) ifFalse: [ ^ false ]. + ^ self name = anObject name + and: [ self chrLocation = anObject chrLocation + and: [ self start = anObject start + and: [ self end = anObject end + and: [ self value = anObject value ] ] ] ] +] + +{ #category : 'converting' } +BioLocus >> asBedInterval [ + "Answer a 0-based half-open interval pair suitable for BED-like exports. + Returns an Array {start0 . end0} or nil when no range is present." + self hasRange ifFalse: [ ^ nil ]. + ^ { self start - 1. self end } +] + +{ #category : 'converting' } +BioLocus >> asGffInterval [ + "Answer an Array with 1-based closed coordinates {start . end}." + self hasRange ifFalse: [ ^ nil ]. + ^ { self start. self end } +] + { #category : 'converting' } BioLocus >> asString [ " Answer a representation of the receiver " - - ^ String streamContents: [ : stream | - stream nextPutAll: self name between: $[ and: $]. - self printRangeOn: stream ] + + ^ String + streamContents: [ :stream | + stream + nextPutAll: self name + between: $[ + and: $]. + self printRangeOn: stream ] ] { #category : 'accessing - public' } @@ -62,6 +111,54 @@ BioLocus >> chrLocation: aString [ chrLocation := aString ] +{ #category : 'testing' } +BioLocus >> containsLocus: aBioLocus [ + "Answer true when aBioLocus is fully contained in receiver. + Chromosome must match when both are defined." + + (aBioLocus isNil or: [ self hasRange not ]) ifTrue: [ ^ false ]. + aBioLocus hasRange ifFalse: [ ^ false ]. + ((self chrLocation notNil + and: [ aBioLocus chrLocation notNil ]) + and: [ self chrLocation ~= aBioLocus chrLocation ]) + ifTrue: [ ^ false ]. + ^ self start <= aBioLocus start and: [ self end >= aBioLocus end ] +] + +{ #category : 'testing' } +BioLocus >> containsPosition: aNumber [ + "Alias for rangeIncludes: for a clearer API." + ^ self rangeIncludes: aNumber +] + +{ #category : 'copying' } +BioLocus >> copyFrom: aBioLocus [ + "Copy relevant coordinates and metadata from aBioLocus." + self name: aBioLocus name. + self chrLocation: aBioLocus chrLocation. + self start: aBioLocus start. + self end: aBioLocus end. + self length: aBioLocus length. + self position: aBioLocus position. + self value: aBioLocus value. + ^ self +] + +{ #category : 'comparing' } +BioLocus >> distanceTo: aBioLocus [ + "Answer distance in bases to aBioLocus. + 0 when overlapping; nil when not comparable (missing range or chromosome mismatch)." + + (self hasRange not or: [ aBioLocus isNil or: [ aBioLocus hasRange not ] ]) + ifTrue: [ ^ nil ]. + ((self chrLocation notNil and: [ aBioLocus chrLocation notNil ]) + and: [ self chrLocation ~= aBioLocus chrLocation ]) + ifTrue: [ ^ nil ]. + (self overlaps: aBioLocus) ifTrue: [ ^ 0 ]. + self end < aBioLocus start ifTrue: [ ^ aBioLocus start - self end ]. + ^ self start - aBioLocus end +] + { #category : 'accessing' } BioLocus >> end [ " Answer a of the receiver's end " @@ -74,6 +171,64 @@ BioLocus >> end: aNumber [ end := aNumber ] +{ #category : 'accessing' } +BioLocus >> from: aStart length: aLength [ + "Set locus using start and length (inclusive coordinates)." + self start: aStart. + self length: aLength. + end := (aStart isNil or: [ aLength isNil ]) + ifTrue: [ nil ] + ifFalse: [ aStart + aLength - 1 ]. + ^ self +] + +{ #category : 'accessing' } +BioLocus >> from: aStart to: anEnd [ + "Set start/end and derive length using inclusive coordinates." + self start: aStart. + self end: anEnd. + self recomputeLength. + ^ self +] + +{ #category : 'testing' } +BioLocus >> hasRange [ + "Answer whether receiver has both start and end coordinates." + ^ self start notNil and: [ self end notNil ] +] + +{ #category : 'comparing' } +BioLocus >> hash [ + ^ { self class. self name. self chrLocation. self start. self end. self value } hash +] + +{ #category : 'operations' } +BioLocus >> intersectionWith: aBioLocus [ + "Answer a new BioLocus representing the interval intersection, or nil." + | newStart newEnd | + (self overlaps: aBioLocus) ifFalse: [ ^ nil ]. + newStart := self start max: aBioLocus start. + newEnd := self end min: aBioLocus end. + ^ self class new + name: (self name ifNil: [ aBioLocus name ]); + chrLocation: (self chrLocation ifNil: [ aBioLocus chrLocation ]); + value: (self value ifNil: [ aBioLocus value ]); + from: newStart to: newEnd; + yourself +] + +{ #category : 'testing' } +BioLocus >> isPoint [ + "Answer whether receiver represents a single-position locus." + ^ self hasRange and: [ self start = self end ] +] + +{ #category : 'testing' } +BioLocus >> isValidRange [ + "Answer whether receiver range is well-formed (start <= end)." + ^ self hasRange and: [ self start <= self end ] +] + { #category : 'accessing' } BioLocus >> length [ " Answer a of the length of this range " @@ -88,6 +243,28 @@ BioLocus >> length: aNumber [ length := aNumber ] +{ #category : 'private' } +BioLocus >> normalizeBounds [ + "Ensure start <= end by swapping when needed, then recompute length." + (start notNil and: [ end notNil and: [ start > end ] ]) ifTrue: [ + | tmp | + tmp := start. + start := end. + end := tmp ]. + self recomputeLength. + ^ self +] + +{ #category : 'testing' } +BioLocus >> overlaps: aBioLocus [ + "Answer true if receiver overlaps aBioLocus. + Chromosome must match when both are defined." + (aBioLocus isNil or: [ self hasRange not or: [ aBioLocus hasRange not ] ]) ifTrue: [ ^ false ]. + ((self chrLocation notNil and: [ aBioLocus chrLocation notNil ]) + and: [ self chrLocation ~= aBioLocus chrLocation ]) ifTrue: [ ^ false ]. + ^ self start <= aBioLocus end and: [ aBioLocus start <= self end ] +] + { #category : 'accessing' } BioLocus >> position [ ^ position @@ -136,11 +313,11 @@ BioLocus >> printNameOn: aStream [ { #category : 'printing' } BioLocus >> printOn: aStream [ - " Private - See superimplementor's comment " + "Private - See superimplementor's comment" super printOn: aStream. self printNameOn: aStream. - self printRangeOn: aStream. + self hasRange ifTrue: [ self printRangeOn: aStream ]. self printChrOn: aStream. self printLengthOn: aStream. self printValueOn: aStream @@ -187,6 +364,25 @@ BioLocus >> rangeIncludes: posNumber chrLocation: chrNumber [ ^ (self start notNil and: [ self end notNil ]) and: [ self chrLocation = chrNumber and: [ self rangeIncludes: posNumber ] ] ] +{ #category : 'private' } +BioLocus >> recomputeLength [ + "Recompute length from start/end using inclusive genomic coordinates." + (start isNil or: [ end isNil ]) + ifTrue: [ length := nil ] + ifFalse: [ length := (end - start) + 1 ]. + ^ length +] + +{ #category : 'operations' } +BioLocus >> shiftBy: anInteger [ + "Shift range coordinates by anInteger bases." + self hasRange ifTrue: [ + self start: self start + anInteger. + self end: self end + anInteger ]. + position notNil ifTrue: [ position := position + anInteger ]. + ^ self +] + { #category : 'accessing' } BioLocus >> start [ " Answer a of the receiver's start " @@ -200,6 +396,23 @@ BioLocus >> start: aNumber [ start := aNumber ] +{ #category : 'operations' } +BioLocus >> unionWith: aBioLocus [ + "Answer a new BioLocus spanning receiver and aBioLocus. + Requires compatible chromosomes when both are defined." + | newStart newEnd | + (self hasRange not or: [ aBioLocus isNil or: [ aBioLocus hasRange not ] ]) ifTrue: [ ^ nil ]. + ((self chrLocation notNil and: [ aBioLocus chrLocation notNil ]) + and: [ self chrLocation ~= aBioLocus chrLocation ]) ifTrue: [ ^ nil ]. + newStart := self start min: aBioLocus start. + newEnd := self end max: aBioLocus end. + ^ self class new + name: (self name ifNil: [ aBioLocus name ]); + chrLocation: (self chrLocation ifNil: [ aBioLocus chrLocation ]); + from: newStart to: newEnd; + yourself +] + { #category : 'accessing' } BioLocus >> value [ ^ value