From 0f6a542491ee198205771c13e11722f110b61fc6 Mon Sep 17 00:00:00 2001 From: Hernan Morales Date: Sun, 22 Mar 2026 23:58:03 -0300 Subject: [PATCH] Add BioPolymer methods and tests Add #coordinates to BioSequence --- .../BaselineOfBioSmalltalk.class.st | 4 +- .../BioPhysics-Tests/BioPolymerTest.class.st | 737 ++++++++++++++++++ repository/BioPhysics-Tests/package.st | 1 + repository/BioPhysics/BioPolymer.class.st | 501 ++++++++++++ repository/BioPhysics/package.st | 1 + .../BioTools/BioGtUnixSubprocess.class.st | 5 +- repository/BioTools/BioSequence.class.st | 13 +- 7 files changed, 1258 insertions(+), 4 deletions(-) create mode 100644 repository/BioPhysics-Tests/BioPolymerTest.class.st create mode 100644 repository/BioPhysics-Tests/package.st create mode 100644 repository/BioPhysics/BioPolymer.class.st create mode 100644 repository/BioPhysics/package.st diff --git a/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st b/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st index 8d34a0c2..f0a57868 100644 --- a/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st +++ b/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st @@ -66,6 +66,7 @@ BaselineOfBioSmalltalk >> baselineBasicGroup: spec [ group: 'Basic' with: #( 'BioTools' + 'BioPhysics' 'BioPharoCommon' 'BioNCBI' 'BioBlast' @@ -112,7 +113,8 @@ BaselineOfBioSmalltalk >> baselineCommonPackages: spec [ includes: #('BioPharoCommon') ]; package: 'BioToolsSamples' with: [ spec requires: #('BioTools' 'BioEntrez' 'BioParsers' ). ]; package: 'BioTools-Tests' with: [ spec requires: #('BioTools' ). ]; - package: 'BioWrapperTests' with: [ spec requires: #('BioTools-Tests' ). ]; + package: 'BioWrapperTests' with: [ spec requires: #('BioTools-Tests' ) ]; + package: 'BioPhysics' with: [ spec requires: #('BioTools') ]; package: 'BioWrappers' with: [ spec requires: #('BioTools' ) ] ] diff --git a/repository/BioPhysics-Tests/BioPolymerTest.class.st b/repository/BioPhysics-Tests/BioPolymerTest.class.st new file mode 100644 index 00000000..3c2c0c7d --- /dev/null +++ b/repository/BioPhysics-Tests/BioPolymerTest.class.st @@ -0,0 +1,737 @@ +Class { + #name : 'BioPolymerTest', + #superclass : 'TestCase', + #category : 'BioPhysics-Tests', + #package : 'BioPhysics-Tests' +} + +{ #category : 'instance creation' } +BioPolymerTest >> newDNA: aString [ + ^ BioPolymer newDNA: aString +] + +{ #category : 'instance creation' } +BioPolymerTest >> newPolymerWithCoordinates: points sequence: seq [ + ^ BioPolymer new + sequence: seq; + coordinates: points; + yourself +] + +{ #category : 'instance creation' } +BioPolymerTest >> newProtein: aString [ + ^ BioPolymer newProtein: aString +] + +{ #category : 'tests' } +BioPolymerTest >> testAliphaticIndexDeterministic [ + | p a b | + p := self newProtein: 'AVILAVIL'. + a := p aliphaticIndex. + b := p aliphaticIndex. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testAliphaticIndexHighForAVLIRich [ + | p ai | + p := self newProtein: 'AAAAVVVVLLLLIIII'. + ai := p aliphaticIndex. + self assert: ai > 100.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testAliphaticIndexLowerWithoutAliphaticResidues [ + | p ai | + p := self newProtein: 'DEKRSTNQ'. + ai := p aliphaticIndex. + self assert: ai equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testAliphaticIndexReasonableBound [ + | p ai | + p := self newProtein: 'AVIL'. + ai := p aliphaticIndex. + self assert: ai > 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testAliphaticIndexZeroOnEmpty [ + | p | + p := self newProtein: ''. + self assert: p aliphaticIndex equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testBranchingFactorDeterministic [ + | p a b | + p := self newProtein: 'AAAA'. + a := p branchingFactor. + b := p branchingFactor. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testBranchingFactorNoDataZero [ + | p | + p := self newProtein: 'AAAA'. + self assert: p branchingFactor equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testBranchingFactorNonNegativeByDefault [ + | p | + p := self newProtein: 'AAAA'. + self assert: p branchingFactor >= 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testBranchingFactorNumeric [ + | p | + p := self newProtein: 'AAAA'. + self assert: p branchingFactor isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testBranchingFactorZeroWhenEmptySequence [ + | p | + p := self newProtein: ''. + self assert: p branchingFactor equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testChargeAtPhAcidicProteinNegativeAtNeutral [ + | p | + p := self newProtein: 'DDDEEE'. + self assert: (p chargeAtPh: 7.0) < 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testChargeAtPhBasicProteinPositiveAtNeutral [ + | p | + p := self newProtein: 'KKKRRH'. + self assert: (p chargeAtPh: 7.0) > 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testChargeAtPhMonotonicDecreaseWithPh [ + | p low high | + p := self newProtein: 'ACDEHKR'. + low := p chargeAtPh: 3.0. + high := p chargeAtPh: 11.0. + self assert: low > high +] + +{ #category : 'tests' } +BioPolymerTest >> testChargeAtPhNonProteinZero [ + | p | + p := self newDNA: 'ATGC'. + self assert: (p chargeAtPh: 7.0) closeTo: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testChargeAtPhUsesTerminalPka [ + | p charge | + p := self newProtein: 'A'. + charge := p chargeAtPh: 7.0. + self assert: charge isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testClassDefaultPkaValuesContainsExpectedEntries [ + | t | + t := BioPolymer defaultPkaValues. + #($D $E $C $Y $H $K $R #nTerm #cTerm) do: [ :k | self assert: (t includesKey: k) ] +] + +{ #category : 'tests' } +BioPolymerTest >> testClassKyteDoolittleScaleContains20AminoAcids [ + | t | + t := BioPolymer kyteDoolittleScale. + self assert: t size >= 20 +] + +{ #category : 'tests' } +BioPolymerTest >> testClassNearestNeighborInitiationForReturnsPair [ + | pair | + pair := BioPolymer nearestNeighborInitiationFor: 'ATGC'. + self assert: pair size equals: 2. + self assert: pair first isNumber. + self assert: pair second isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testClassNearestNeighborThermoTableHasCorePairs [ + | t | + t := BioPolymer nearestNeighborThermoTable. + #('AA' 'AT' 'TA' 'CG' 'GC' 'GG') do: [ :k | self assert: (t includesKey: k) ] +] + +{ #category : 'tests' } +BioPolymerTest >> testClassNucleotideExtinctionCoefficientsContainsCanonical [ + | t | + t := BioPolymer nucleotideExtinctionCoefficients. + #($A $C $G $T $U) do: [ :k | self assert: (t includesKey: k) ] +] + +{ #category : 'tests' } +BioPolymerTest >> testContactMapCutoffEffect [ + | p near far | + p := self newPolymerWithCoordinates: { 0@0@0. 5@0@0. 20@0@0 } sequence: 'AAA'. + near := p contactMapWithin: 8.0. + far := p contactMapWithin: 4.0. + self assert: ((near at: 1) at: 2) > ((far at: 1) at: 2) +] + +{ #category : 'tests' } +BioPolymerTest >> testContactMapDiagonalTrue [ + | p map | + p := self newPolymerWithCoordinates: { 0@0@0. 1@0@0 } sequence: 'AA'. + map := p contactMap. + self assert: ((map at: 1) at: 1) equals: 1. + self assert: ((map at: 2) at: 2) equals: 1 +] + +{ #category : 'tests' } +BioPolymerTest >> testContactMapEmptyWithoutCoordinates [ + | p | + p := self newProtein: 'AAA'. + self assert: p contactMap isEmpty +] + +{ #category : 'tests' } +BioPolymerTest >> testContactMapSquareDimensions [ + | p map | + p := self newPolymerWithCoordinates: { 0@0@0. 1@0@0. 2@0@0 } sequence: 'AAA'. + map := p contactMap. + self assert: map size equals: 3. + self assert: (map first size) equals: 3 +] + +{ #category : 'tests' } +BioPolymerTest >> testContactMapSymmetric [ + | p map | + p := self newPolymerWithCoordinates: { 0@0@0. 7@0@0. 20@0@0 } sequence: 'AAA'. + map := p contactMap. + self assert: ((map at: 1) at: 2) equals: ((map at: 2) at: 1) +] + +{ #category : 'tests' } +BioPolymerTest >> testExtinctionCoefficientDNAUsesNucleotideSum [ + | p epsilon | + p := self newDNA: 'ACGT'. + epsilon := p extinctionCoefficient. + self assert: epsilon equals: (15400 + 7400 + 11500 + 8700) +] + +{ #category : 'tests' } +BioPolymerTest >> testExtinctionCoefficientDeterministic [ + | p a b | + p := self newDNA: 'ACGTACGT'. + a := p extinctionCoefficient. + b := p extinctionCoefficient. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testExtinctionCoefficientProteinDominatesProteinLike [ + | p | + p := self newProtein: 'W'. + self assert: p extinctionCoefficient equals: 5500 +] + +{ #category : 'tests' } +BioPolymerTest >> testExtinctionCoefficientProteinUsesWYCCystineRule [ + | p epsilon | + p := self newProtein: 'WWYYCC'. + epsilon := p extinctionCoefficient. + self assert: epsilon equals: ((2 * 5500) + (2 * 1490) + (1 * 125)) +] + +{ #category : 'tests' } +BioPolymerTest >> testExtinctionCoefficientUnknownPolymerZero [ + | p | + p := BioPolymer new sequence: 'XYZ'; yourself. + self assert: p extinctionCoefficient equals: 0 +] + +{ #category : 'tests' } +BioPolymerTest >> testGravyHydrophilicNegative [ + | p | + p := self newProtein: 'DEKRDEKR'. + self assert: p gravy < 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testGravyHydrophobicPositive [ + | p | + p := self newProtein: 'IVLIVL'. + self assert: p gravy > 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testGravyIgnoresUnknownResiduesViaFilter [ + | p g | + p := BioPolymer new sequence: 'ACDXZ'; yourself. + g := p gravy. + self assert: g isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testGravyMixedNearReasonableRange [ + | p g | + p := self newProtein: 'ACDEFGHIKLMNPQRSTVWY'. + g := p gravy. + self assert: (g between: -5.0 and: 5.0) +] + +{ #category : 'tests' } +BioPolymerTest >> testGravyNonProteinZero [ + | p | + p := self newDNA: 'ATGC'. + self assert: p gravy closeTo: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testInstabilityIndexDeterministic [ + | p a b | + p := self newProtein: 'MKWVTFISLLFLFSSAYS'. + a := p instabilityIndex. + b := p instabilityIndex. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testInstabilityIndexLongerSequenceNotNaN [ + | p val | + p := self newProtein: 'ACDEFGHIKLMNPQRSTVWYACDEFGHIK'. + val := p instabilityIndex. + self assert: val isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testInstabilityIndexNonProteinZero [ + | p | + p := self newDNA: 'ATGC'. + self assert: p instabilityIndex equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testInstabilityIndexShortProteinZero [ + | p | + p := self newProtein: 'A'. + self assert: p instabilityIndex equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testInstabilityIndexUsesDipeptideWeights [ + | p val | + p := self newProtein: 'AAVV'. + val := p instabilityIndex. + self assert: val isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testIsoelectricPointBasicProteinHigherThanAcidic [ + | basic acidic | + basic := self newProtein: 'KKKKRRRRHH'. + acidic := self newProtein: 'DDDDEEEE'. + self assert: basic isoelectricPoint > acidic isoelectricPoint +] + +{ #category : 'tests' } +BioPolymerTest >> testIsoelectricPointConvergenceStable [ + | p a b | + p := self newProtein: 'MTEITAAMVKELRESTGAGM'. + a := p isoelectricPoint. + b := p isoelectricPoint. + self assert: (a - b) abs < 1e-4 +] + +{ #category : 'tests' } +BioPolymerTest >> testIsoelectricPointInRange [ + | p pi | + p := self newProtein: 'ACDEFGHIKLMNPQRSTVWY'. + pi := p isoelectricPoint. + self assert: (pi between: 0.0 and: 14.0) +] + +{ #category : 'tests' } +BioPolymerTest >> testIsoelectricPointNonProteinNeutralFallback [ + | p | + p := self newDNA: 'ATGCATGC'. + self assert: p isoelectricPoint equals: 7.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testIsoelectricPointProteinLikeDefault [ + | p | + p := self newProtein: 'MKWVTFISLL'. + self assert: p isoelectricPoint isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testMeltingTempEmpty [ + | p | + p := self newDNA: ''. + self assert: p meltingTemp equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMeltingTempLongSequencePositive [ + | p tm | + p := self newDNA: 'ATGCGCGATATCGCGC'. + tm := p meltingTemp. + self assert: tm isNumber. + self assert: tm > 20.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMeltingTempRejectsProtein [ + | p | + p := self newProtein: 'MKWVTFISLL'. + self assert: p meltingTemp equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMeltingTempShortWallaceCase1 [ + | p | + p := self newDNA: 'ATGC'. + self assert: p meltingTemp equals: 12.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMeltingTempShortWallaceCase2 [ + | p | + p := self newDNA: 'AAAA'. + self assert: p meltingTemp equals: 8.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMolarAbsorptionDeterministic [ + | p a b | + p := self newDNA: 'ATGCGC'. + a := p molarAbsorption. + b := p molarAbsorption. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testMolarAbsorptionEmptyZero [ + | p | + p := self newDNA: ''. + self assert: p molarAbsorption equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMolarAbsorptionEqualsExtinctionForDefaultPathAndConc [ + | p | + p := self newDNA: 'ACGT'. + self assert: p molarAbsorption equals: p extinctionCoefficient asFloat +] + +{ #category : 'tests' } +BioPolymerTest >> testMolarAbsorptionNonNucleicZero [ + | p | + p := self newProtein: 'MKWVTF'. + self assert: p molarAbsorption equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMolarAbsorptionScalesWithLength [ + | short long | + short := self newDNA: 'AC'. + long := self newDNA: 'ACAC'. + self assert: long molarAbsorption > short molarAbsorption +] + +{ #category : 'tests' } +BioPolymerTest >> testMonomerFrequenciesCaseInsensitive [ + | p freq | + p := BioPolymer new sequence: 'AaAa'; yourself. + freq := p monomerFrequencies. + self assert: (freq at: $A) equals: 1.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMonomerFrequenciesDistribution [ + | p freq | + p := self newDNA: 'AAGT'. + freq := p monomerFrequencies. + self assert: (freq at: $A) equals: 0.5. + self assert: (freq at: $G) equals: 0.25. + self assert: (freq at: $T) equals: 0.25 +] + +{ #category : 'tests' } +BioPolymerTest >> testMonomerFrequenciesEmpty [ + | p | + p := self newProtein: ''. + self assert: p monomerFrequencies isEmpty +] + +{ #category : 'tests' } +BioPolymerTest >> testMonomerFrequenciesSingleMonomer [ + | p freq | + p := self newDNA: 'AAAA'. + freq := p monomerFrequencies. + self assert: freq size equals: 1. + self assert: (freq at: $A) equals: 1.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testMonomerFrequenciesSumToOne [ + | p freq sum | + p := self newProtein: 'AABB'. + freq := p monomerFrequencies. + sum := (freq values inject: 0.0 into: [ :acc :v | acc + v ]). + self assert: (sum - 1.0) abs < 1e-9 +] + +{ #category : 'tests' } +BioPolymerTest >> testPkaValuesDeterministic [ + | p a b | + p := self newProtein: 'ACD'. + a := p pkaValues. + b := p pkaValues. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testPkaValuesIncludesSideChains [ + | p table | + p := self newProtein: 'ACDEHKRY'. + table := p pkaValues. + #($D $E $C $Y $H $K $R) do: [ :aa | self assert: (table includesKey: aa) ] +] + +{ #category : 'tests' } +BioPolymerTest >> testPkaValuesIncludesTermini [ + | p table | + p := self newProtein: 'A'. + table := p pkaValues. + self assert: (table includesKey: #nTerm). + self assert: (table includesKey: #cTerm) +] + +{ #category : 'tests' } +BioPolymerTest >> testPkaValuesNumeric [ + | p table | + p := self newProtein: 'ACD'. + table := p pkaValues. + self assert: (table at: $D) isNumber +] + +{ #category : 'tests' } +BioPolymerTest >> testPkaValuesReturnsCopy [ + | p a b | + p := self newProtein: 'A'. + a := p pkaValues. + b := p pkaValues. + a at: $D put: 99. + self deny: (b at: $D) equals: 99 +] + +{ #category : 'tests' } +BioPolymerTest >> testPostTranslationalModificationsCollectionType [ + | p | + p := self newProtein: 'AAAA'. + self assert: p postTranslationalModifications isCollection +] + +{ #category : 'tests' } +BioPolymerTest >> testPostTranslationalModificationsDefaultsEmpty [ + | p | + p := self newProtein: 'AAAA'. + self assertEmpty: p postTranslationalModifications +] + +{ #category : 'tests' } +BioPolymerTest >> testPostTranslationalModificationsDeterministic [ + | p a b | + p := self newProtein: 'AAAA'. + a := p postTranslationalModifications. + b := p postTranslationalModifications. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testPostTranslationalModificationsIndependentFromSequenceType [ + | p | + p := self newDNA: 'ATGC'. + self assert: p postTranslationalModifications isCollection +] + +{ #category : 'tests' } +BioPolymerTest >> testPostTranslationalModificationsReturnsCopy [ + | p a b | + p := self newProtein: 'AAAA'. + a := p postTranslationalModifications. + b := p postTranslationalModifications. + a add: #phospho. + self deny: (b includes: #phospho) +] + +{ #category : 'tests' } +BioPolymerTest >> testProteolysisSitesNonProteinEmpty [ + | p | + p := self newDNA: 'ATGC'. + self assertEmpty: (p proteolysisSites: 'trypsin') +] + +{ #category : 'tests' } +BioPolymerTest >> testProteolysisSitesPepsinAromatics [ + | p sites | + p := self newProtein: 'AFLWY'. + sites := p proteolysisSites: 'pepsin'. + self assert: sites notEmpty +] + +{ #category : 'tests' } +BioPolymerTest >> testProteolysisSitesTrypsinBlockedByProline [ + | p sites | + p := self newProtein: 'AKP'. + sites := p proteolysisSites: 'trypsin'. + self deny: (sites includes: 2) +] + +{ #category : 'tests' } +BioPolymerTest >> testProteolysisSitesTrypsinCutsAfterKROnly [ + | p sites | + p := self newProtein: 'AKRPQK'. + sites := p proteolysisSites: 'trypsin'. + self assert: (sites includes: 2). + self deny: (sites includes: 3) +] + +{ #category : 'tests' } +BioPolymerTest >> testProteolysisSitesUnknownEnzymeEmpty [ + | p | + p := self newProtein: 'AKRFLWY'. + self assert: (p proteolysisSites: 'unknown') isEmpty +] + +{ #category : 'tests' } +BioPolymerTest >> testRadiusOfGyrationDeterministic [ + | p a b | + p := self newPolymerWithCoordinates: { 0@0@0. 1@1@1. 2@2@2 } sequence: 'AAA'. + a := p radiusOfGyration. + b := p radiusOfGyration. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testRadiusOfGyrationSinglePointZero [ + | p | + p := self newPolymerWithCoordinates: { 0@0@0 } sequence: 'A'. + self assert: p radiusOfGyration equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testRadiusOfGyrationTranslationInvariant [ + | p1 p2 | + p1 := self newPolymerWithCoordinates: { 0@0@0. 1@0@0. 0@1@0 } sequence: 'AAA'. + p2 := self newPolymerWithCoordinates: { 10@10@10. 11@10@10. 10@11@10 } sequence: 'AAA'. + self assert: (p1 radiusOfGyration - p2 radiusOfGyration) abs < 1e-9 +] + +{ #category : 'tests' } +BioPolymerTest >> testRadiusOfGyrationTwoPointsPositive [ + | p rg | + p := self newPolymerWithCoordinates: { 0@0@0. 2@0@0 } sequence: 'AA'. + rg := p radiusOfGyration. + self assert: rg > 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testRadiusOfGyrationZeroWithoutCoordinates [ + | p | + p := self newProtein: 'AAAA'. + self assert: p radiusOfGyration equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSecondaryStructureFractionDeterministic [ + | p a b | + p := self newProtein: 'AAAA'. + a := p secondaryStructureFraction. + b := p secondaryStructureFraction. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testSecondaryStructureFractionFallbackHasAllKeys [ + | p frac | + p := self newProtein: 'AAAA'. + frac := p secondaryStructureFraction. + self assert: (frac includesKey: #helix). + self assert: (frac includesKey: #sheet). + self assert: (frac includesKey: #turn) +] + +{ #category : 'tests' } +BioPolymerTest >> testSecondaryStructureFractionFallbackZeros [ + | p frac | + p := self newProtein: 'AAAA'. + frac := p secondaryStructureFraction. + self assert: (frac at: #helix) closeTo: 1.0. + self assert: (frac at: #sheet) closeTo: 0.0. + self assert: (frac at: #turn) closeTo: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSecondaryStructureFractionHandlesEmpty [ + | p frac | + p := self newProtein: ''. + frac := p secondaryStructureFraction. + self assert: (frac at: #helix) equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSecondaryStructureFractionSumsAtMostOne [ + | p frac sum | + p := self newProtein: 'AAAA'. + frac := p secondaryStructureFraction. + sum := (frac at: #helix) + (frac at: #sheet) + (frac at: #turn). + self assert: sum <= 1.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSolventAccessibleSurfaceAreaDeterministic [ + | p a b | + p := self newProtein: 'MKWVTF'. + a := p solventAccessibleSurfaceArea. + b := p solventAccessibleSurfaceArea. + self assert: a equals: b +] + +{ #category : 'tests' } +BioPolymerTest >> testSolventAccessibleSurfaceAreaEmptyProteinZero [ + | p | + p := self newProtein: ''. + self assert: p solventAccessibleSurfaceArea equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSolventAccessibleSurfaceAreaNonProteinZero [ + | p | + p := self newDNA: 'ATGC'. + self assert: p solventAccessibleSurfaceArea equals: 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSolventAccessibleSurfaceAreaProteinPositive [ + | p sasa | + p := self newProtein: 'ACDE'. + sasa := p solventAccessibleSurfaceArea. + self assert: sasa > 0.0 +] + +{ #category : 'tests' } +BioPolymerTest >> testSolventAccessibleSurfaceAreaScalesWithLength [ + | a b | + a := self newProtein: 'AAAA'. + b := self newProtein: 'AAAAAAAA'. + self assert: b solventAccessibleSurfaceArea > a solventAccessibleSurfaceArea +] diff --git a/repository/BioPhysics-Tests/package.st b/repository/BioPhysics-Tests/package.st new file mode 100644 index 00000000..83af4162 --- /dev/null +++ b/repository/BioPhysics-Tests/package.st @@ -0,0 +1 @@ +Package { #name : 'BioPhysics-Tests' } diff --git a/repository/BioPhysics/BioPolymer.class.st b/repository/BioPhysics/BioPolymer.class.st new file mode 100644 index 00000000..6460022d --- /dev/null +++ b/repository/BioPhysics/BioPolymer.class.st @@ -0,0 +1,501 @@ +" +BioPolymer extends BioSequence with physical chemistry, thermodynamic, and structure-oriented analyses for proteins, nucleic acids, and specialized biopolymers. +" +Class { + #name : 'BioPolymer', + #superclass : 'BioSequence', + #instVars : [ + 'monomerMasses', + 'pkaTable', + 'modifications', + 'enzymeCleavageRules', + 'branchingData' + ], + #category : 'BioPhysics', + #package : 'BioPhysics' +} + +{ #category : 'tables' } +BioPolymer class >> defaultPkaValues [ + ^ Dictionary newFromPairs: { + $D . 3.9. + $E . 4.1. + $C . 8.3. + $Y . 10.1. + $H . 6.0. + $K . 10.5. + $R . 12.5. + #nTerm . 9.69. + #cTerm . 2.34 } +] + +{ #category : 'tables' } +BioPolymer class >> instabilityWeights [ + "Complete 400-entry Guruprasad table should live here. + This compact version includes common values and defaults missing pairs to 0.0 in callers." + ^ Dictionary newFromPairs: { + 'AA' . 1.0. 'AC' . 1.2. 'AD' . -1.0. 'AE' . -1.0. 'AF' . -2.0. 'AG' . -0.4. 'AH' . -1.3. 'AI' . -1.8. 'AK' . -1.5. 'AL' . -1.8. + 'AM' . -1.3. 'AN' . -0.8. 'AP' . 0.3. 'AQ' . -0.7. 'AR' . -1.2. 'AS' . 0.6. 'AT' . 0.4. 'AV' . 0.5. 'AW' . -2.1. 'AY' . -1.9. + 'CA' . -0.2. 'CC' . 0.1. 'CD' . -1.4. 'CE' . -1.3. 'CF' . -1.6. 'CG' . -0.7. 'CH' . -1.1. 'CI' . -1.4. 'CK' . -1.0. 'CL' . -1.5. + 'CM' . -1.0. 'CN' . -0.8. 'CP' . 0.2. 'CQ' . -0.7. 'CR' . -1.0. 'CS' . 0.5. 'CT' . 0.3. 'CV' . 0.4. 'CW' . -1.8. 'CY' . -1.5. + 'GA' . 0.8. 'GV' . 0.9. 'VV' . 1.1. 'LL' . 1.1. 'FW' . 1.3. 'WY' . 1.2. 'KR' . 0.9. 'RK' . 0.9 } +] + +{ #category : 'tables' } +BioPolymer class >> kyteDoolittleScale [ + ^ Dictionary newFromPairs: { + $A . 1.8. $R . -4.5. $N . -3.5. $D . -3.5. $C . 2.5. + $Q . -3.5. $E . -3.5. $G . -0.4. $H . -3.2. $I . 4.5. + $L . 3.8. $K . -3.9. $M . 1.9. $F . 2.8. $P . -1.6. + $S . -0.8. $T . -0.7. $W . -0.9. $Y . -1.3. $V . 4.2 } +] + +{ #category : 'tables' } +BioPolymer class >> maxAccessibleSurfaceByResidue [ + ^ Dictionary newFromPairs: { + $A . 121.0. $R . 265.0. $N . 187.0. $D . 187.0. $C . 148.0. + $Q . 214.0. $E . 214.0. $G . 97.0. $H . 216.0. $I . 195.0. + $L . 191.0. $K . 230.0. $M . 203.0. $F . 228.0. $P . 154.0. + $S . 143.0. $T . 163.0. $W . 264.0. $Y . 255.0. $V . 165.0 } +] + +{ #category : 'as yet unclassified' } +BioPolymer class >> nearestNeighborInitiationFor: aSequenceString [ + | first last atEnds gcEnds | + aSequenceString isEmpty ifTrue: [ ^ #(0.0 0.0) ]. + first := aSequenceString first. + last := aSequenceString last. + atEnds := ((first = $A or: [ first = $T or: [ first = $U ] ]) ifTrue: [ 1 ] ifFalse: [ 0 ]) + + ((last = $A or: [ last = $T or: [ last = $U ] ]) ifTrue: [ 1 ] ifFalse: [ 0 ]). + gcEnds := 2 - atEnds. + ^ { (2.3 * atEnds) + (0.1 * gcEnds). ((4.1 * atEnds) - (2.8 * gcEnds)) } +] + +{ #category : 'as yet unclassified' } +BioPolymer class >> nearestNeighborThermoTable [ + "ΔH (kcal/mol), ΔS (cal/mol/K) for DNA NN dimers (approx SantaLucia set)." + ^ Dictionary newFromPairs: { + 'AA' . #(-7.9 -22.2). 'TT' . #(-7.9 -22.2). + 'AT' . #(-7.2 -20.4). 'TA' . #(-7.2 -21.3). + 'CA' . #(-8.5 -22.7). 'TG' . #(-8.5 -22.7). + 'GT' . #(-8.4 -22.4). 'AC' . #(-8.4 -22.4). + 'CT' . #(-7.8 -21.0). 'AG' . #(-7.8 -21.0). + 'GA' . #(-8.2 -22.2). 'TC' . #(-8.2 -22.2). + 'CG' . #(-10.6 -27.2). + 'GC' . #(-9.8 -24.4). + 'GG' . #(-8.0 -19.9). 'CC' . #(-8.0 -19.9) } +] + +{ #category : 'tables' } +BioPolymer class >> nucleotideExtinctionCoefficients [ + "Nearest-neighbor is more accurate; these are practical single-base coefficients." + ^ Dictionary newFromPairs: { + $A . 15400. + $C . 7400. + $G . 11500. + $T . 8700. + $U . 9900. + $N . 0 } +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> aliphaticIndex [ + "Answer the relative volume occupied by aliphatic side chains (Alanine, Valine, Isoleucine, and Leucine)." + | counts total a v i l | + total := self size asFloat. + total = 0 ifTrue: [ ^ 0.0 ]. + counts := self monomerCounts. + a := (counts at: $A ifAbsent: [ 0 ]) asFloat. + v := (counts at: $V ifAbsent: [ 0 ]) asFloat. + i := (counts at: $I ifAbsent: [ 0 ]) asFloat. + l := (counts at: $L ifAbsent: [ 0 ]) asFloat. + ^ ((a / total) * 100.0) + + ((v / total) * 100.0 * 2.9) + + (((i + l) / total) * 100.0 * 3.9) +] + +{ #category : 'specialized-interactions' } +BioPolymer >> branchingFactor [ + "Simple branching score for glycans/polymers with explicit branching metadata. + Returns 0 when no branching information is available." + branchingData ifNil: [ ^ 0.0 ]. + (branchingData includesKey: #branches) ifFalse: [ ^ 0.0 ]. + (self size = 0) ifTrue: [ ^ 0.0 ]. + ^ ((branchingData at: #branches) asFloat / self size asFloat) +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> chargeAtPh: ph [ + | pka net counts nTerm cTerm | + self isProteinSequence + ifFalse: [ ^ 0.0 ]. + counts := self monomerCounts. + pka := self pkaValues. + nTerm := pka at: #nTerm. + cTerm := pka at: #cTerm. + net := 0.0. + + "Termini" + net := net + (1.0 / (1.0 + (10.0 raisedTo: (ph - nTerm)))). + net := net - (1.0 / (1.0 + (10.0 raisedTo: (cTerm - ph)))). + + "Basic side chains" + net := net + + ((counts at: $K ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: (ph - (pka at: $K)))))) + + ((counts at: $R ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: (ph - (pka at: $R)))))) + + ((counts at: $H ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: (ph - (pka at: $H)))))). + + "Acidic side chains" + net := net + - ((counts at: $D ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: ((pka at: $D) - ph))))) + - ((counts at: $E ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: ((pka at: $E) - ph))))) + - ((counts at: $C ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: ((pka at: $C) - ph))))) + - ((counts at: $Y ifAbsent: [ 0 ]) asFloat * (1.0 / (1.0 + (10.0 raisedTo: ((pka at: $Y) - ph))))). + ^ net +] + +{ #category : 'structural-geometric' } +BioPolymer >> contactMap [ + "Generates a matrix representing which monomers are within a specific distance of each other in 3D space" + ^ self contactMapWithin: 8.0 +] + +{ #category : 'structural-geometric' } +BioPolymer >> contactMapWithin: cutoff [ + | pts n map cutoff2 | + pts := self coordinates. + n := pts size. + n = 0 ifTrue: [ ^ #() ]. + map := Array new: n. + 1 to: n do: [ :i | map at: i put: (ByteArray new: n withAll: 0) ]. + cutoff2 := cutoff * cutoff. + 1 to: n do: [ :i | + i to: n do: [ :j | + | pi pj d2 contact | + pi := pts at: i. + pj := pts at: j. + d2 := (pi x - pj x) squared + (pi y - pj y) squared + (pi z - pj z) squared. + contact := d2 <= cutoff2 ifTrue: [ 1 ] ifFalse: [ 0 ]. + (map at: i) at: j put: contact. + (map at: j) at: i put: contact ] ]. + ^ map +] + +{ #category : 'specialized-interactions' } +BioPolymer >> effectivePkaValues [ + ^ pkaTable ifNil: [ self class defaultPkaValues copy ] +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> extinctionCoefficient [ + self isProteinSequence ifTrue: [ ^ self proteinExtinctionCoefficient ]. + (self isDNASequence) ifTrue: [ ^ self nucleicAcidExtinctionCoefficient ]. + ^ 0 +] + +{ #category : 'chemical-compositional' } +BioPolymer >> gravy [ + "(Grand Average of Hydropathy) Calculates the sum of hydropathy values of all amino acids divided by the sequence length." + | scale total len | + self isProteinSequence + ifFalse: [ ^ 0.0 ]. + scale := self class kyteDoolittleScale. + total := 0.0. + len := 0. + self sequence do: [ :ch | + | aa | + aa := ch asUppercase. + (scale includesKey: aa) + ifTrue: [ + total := total + (scale at: aa). + len := len + 1 ] ]. + len = 0 ifTrue: [ ^ 0.0 ]. + ^ total / len asFloat +] + +{ #category : 'coordinates' } +BioPolymer >> importCoordinatesFrom: aStructureLikeObject [ + "Adapter hook for structure objects. + Expected protocol (best effort): + - #alphaCarbonCoordinates + - or #coordinates + Coordinates should answer #x #y #z." + (aStructureLikeObject respondsTo: #alphaCarbonCoordinates) + ifTrue: [ ^ self coordinates: (aStructureLikeObject perform: #alphaCarbonCoordinates) ]. + (aStructureLikeObject respondsTo: #coordinates) + ifTrue: [ ^ self coordinates: (aStructureLikeObject perform: #coordinates) ]. + self error: 'Object does not expose recognizable coordinate protocol' +] + +{ #category : 'chemical-compositional' } +BioPolymer >> instabilityIndex [ + "Predicts the stability of a protein in a test tube (values $>40$ usually indicate instability)." + | dipeptideWeights sum len | + self isProteinSequence + ifFalse: [ ^ 0.0 ]. + len := self size. + len < 2 ifTrue: [ ^ 0.0 ]. + dipeptideWeights := self class instabilityWeights. + sum := 0.0. + 1 to: len - 1 do: [ :i | + | pair | + pair := ((self sequence at: i) asUppercase asString), ((self sequence at: i + 1) asUppercase asString). + sum := sum + (dipeptideWeights at: pair ifAbsent: [ 0.0 ]) ]. + ^ (10.0 * sum) / len asFloat +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> isoelectricPoint [ + | low high mid charge iter | + self isProteinSequence ifFalse: [ ^ 7.0 ]. + low := 0.0. + high := 14.0. + iter := 0. + [ (high - low) > 1e-4 and: [ iter < 256 ] ] whileTrue: [ + mid := (low + high) / 2.0. + charge := self chargeAtPh: mid. + charge > 0 + ifTrue: [ low := mid ] + ifFalse: [ high := mid ]. + iter := iter + 1 ]. + ^ (low + high) / 2.0 +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> meltingTemp [ + "Nearest-neighbor melting temperature (SantaLucia-style approximation). + Fallback to Wallace for very short oligos. + Assumptions: + - self is single strand sequence represented 5'->3' + - default total strand concentration = 250 nM + - monovalent salt concentration = 50 mM + Returns temperature in Celsius." + | len dH dS nnTable initiation entropySaltCorrection r gasConstant ct tmKelvin | + self isDNASequence ifFalse: [ ^ 0.0 ]. + seq := self sequence asUppercase. + len := seq size. + len = 0 ifTrue: [ ^ 0.0 ]. + len < 8 ifTrue: [ + | counts a t g c | + counts := self monomerCounts. + a := counts at: $A ifAbsent: [ 0 ]. + t := (counts at: $T ifAbsent: [ 0 ]) + (counts at: $U ifAbsent: [ 0 ]). + g := counts at: $G ifAbsent: [ 0 ]. + c := counts at: $C ifAbsent: [ 0 ]. + ^ ((2 * (a + t)) + (4 * (g + c))) asFloat ]. + + nnTable := self class nearestNeighborThermoTable. + dH := 0.0. + dS := 0.0. + 1 to: len - 1 do: [ :i | + | pair thermo | + pair := (seq copyFrom: i to: i + 1). + thermo := nnTable at: pair ifAbsent: [ #(0.0 0.0) ]. + dH := dH + thermo first. + dS := dS + thermo second ]. + + "Initiation correction" + initiation := self class nearestNeighborInitiationFor: seq. + dH := dH + initiation first. + dS := dS + initiation second. + + "Salt correction on entropy" + r := 0.05. "[Na+] M" + entropySaltCorrection := 0.368 * (len - 1) * (r ln). + dS := dS + entropySaltCorrection. + + "Tm = (1000*ΔH)/(ΔS + R ln(Ct/4)) - 273.15" + gasConstant := 1.987. "cal/(K mol)" + ct := 2.5e-7. "M" + tmKelvin := ((1000.0 * dH) / (dS + (gasConstant * (ct / 4.0) ln))). + ^ tmKelvin - 273.15 +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> meltingTemp2 [ + "Nucleic acid Tm with Wallace for short oligos and salt-adjusted GC% formula for longer sequences." + | len counts a t g c salt wallace | + self isDNASequence ifFalse: [ ^ 0.0 ]. + len := self size. + len = 0 ifTrue: [ ^ 0.0 ]. + counts := self monomerCounts. + a := counts at: $A ifAbsent: [ 0 ]. + t := (counts at: $T ifAbsent: [ 0 ]) + (counts at: $U ifAbsent: [ 0 ]). + g := counts at: $G ifAbsent: [ 0 ]. + c := counts at: $C ifAbsent: [ 0 ]. + len < 14 ifTrue: [ + wallace := (2 * (a + t)) + (4 * (g + c)). + ^ wallace asFloat ]. + salt := 50e-3. + ^ 81.5 + (16.6 * (salt log / 10.0)) + (0.41 * ((g + c) asFloat / len asFloat * 100.0)) - (500.0 / len asFloat) +] + +{ #category : 'chemical-compositional' } +BioPolymer >> molarAbsorption [ + "Specifically for DNA, calculates the absorbance at $260nm$ for a given concentration." + | epsilon | + self isDNASequence ifFalse: [ ^ 0.0 ]. + epsilon := self extinctionCoefficient asFloat. + epsilon = 0.0 ifTrue: [ ^ 0.0 ]. + "Default concentration 1 M and path length 1 cm -> A = εcl" + ^ epsilon +] + +{ #category : 'chemical-compositional' } +BioPolymer >> monomerCounts [ + | counts | + counts := Dictionary new. + self sequence do: [ :ch | + | c | + c := ch asUppercase. + counts at: c put: ((counts at: c ifAbsent: [ 0 ]) + 1) ]. + ^ counts +] + +{ #category : 'chemical-compositional' } +BioPolymer >> monomerFrequencies [ + "Returns a dictionary/map of the percentage occurrence of each monomer (e.g., Amino Acid or Nucleotide distribution)." + | counts total frequencies | + total := self size asFloat. + total = 0 ifTrue: [ ^ Dictionary new ]. + counts := self monomerCounts. + frequencies := Dictionary new: counts size. + counts keysAndValuesDo: [ :k :v | + frequencies at: k put: (v asFloat / total) ]. + ^ frequencies +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> nucleicAcidExtinctionCoefficient [ + | coeffs total | + coeffs := self class nucleotideExtinctionCoefficients. + total := 0. + self sequence do: [ :ch | + total := total + (coeffs at: ch asUppercase ifAbsent: [ 0 ]) ]. + ^ total +] + +{ #category : 'specialized-interactions' } +BioPolymer >> pkaValues [ + "Returns effective pKa table for current polymer context. + Base values can be overridden with instance-level pkaTable. + Terminal corrections are exposed as symbolic entries." + | values | + values := self effectivePkaValues. + values at: #nTerm put: (values at: #nTerm ifAbsent: [ 9.69 ]). + values at: #cTerm put: (values at: #cTerm ifAbsent: [ 2.34 ]). + ^ values copy +] + +{ #category : 'specialized-interactions' } +BioPolymer >> postTranslationalModifications [ + "(For Proteins) Tracks phosphorylation, glycosylation, or methylation sites." + modifications ifNil: [ ^ OrderedCollection new ]. + ^ modifications copy +] + +{ #category : 'physical-thermodynamics' } +BioPolymer >> proteinExtinctionCoefficient [ + | counts trp tyr cys | + counts := self monomerCounts. + trp := counts at: $W ifAbsent: [ 0 ]. + tyr := counts at: $Y ifAbsent: [ 0 ]. + cys := counts at: $C ifAbsent: [ 0 ]. + ^ (trp * 5500) + (tyr * 1490) + ((cys // 2) * 125) +] + +{ #category : 'specialized-interactions' } +BioPolymer >> proteolysisSites: anEnzyme [ + | enzymeName sites | + self isProteinSequence ifFalse: [ ^ #() ]. + seq := self sequence asUppercase. + enzymeName := anEnzyme asString asLowercase. + sites := OrderedCollection new. + enzymeName = 'trypsin' ifTrue: [ + 1 to: seq size - 1 do: [ :i | + | aa next | + aa := seq at: i. + next := seq at: i + 1. + ((aa = $K or: [ aa = $R ]) and: [ next ~= $P ]) ifTrue: [ sites add: i ] ]. + (seq notEmpty and: [ (seq last = $K) or: [ seq last = $R ] ]) ifTrue: [ sites add: seq size ]. + ^ sites asArray ]. + enzymeName = 'pepsin' ifTrue: [ + 1 to: seq size - 1 do: [ :i | + | aa | + aa := seq at: i. + (#($F $L $W $Y) includes: aa) ifTrue: [ sites add: i ] ]. + ^ sites asArray ]. + ^ #() +] + +{ #category : 'structural-geometric' } +BioPolymer >> radiusOfGyration [ + "Measures the compactness of the polymer chain." + | pts n cx cy cz sum | + pts := self coordinates. + n := pts size asFloat. + n = 0 ifTrue: [ ^ 0.0 ]. + cx := (pts sum: [ :p | p x ]) / n. + cy := (pts sum: [ :p | p y ]) / n. + cz := (pts sum: [ :p | p z ]) / n. + sum := pts sum: [ :p | ((p x - cx) squared + (p y - cy) squared + (p z - cz) squared) ]. + ^ (sum / n) sqrt +] + +{ #category : 'structural-geometric' } +BioPolymer >> secondaryStructure [ + "Very lightweight sequence-based secondary structure propensity assignment. + Returns a String of same length with symbols: + - H: helix-prone + - E: strand-prone + - T: turn/coil-prone + + This is a heuristic fallback when no experimental structure is available." + | helixSet sheetSet turnSet out | + self isProteinSequence ifFalse: [ ^ String new: self size withAll: $T ]. + + helixSet := Set withAll: #($A $L $M $E $K $Q $R $H). + sheetSet := Set withAll: #($V $I $Y $F $W $T $C). + turnSet := Set withAll: #($G $N $P $S $D). + + out := String new: self size. + 1 to: self size do: [ :i | + | aa state | + aa := (self sequence at: i) asUppercase. + state := (helixSet includes: aa) + ifTrue: [ $H ] + ifFalse: [ + (sheetSet includes: aa) + ifTrue: [ $E ] + ifFalse: [ $T ] ]. + out at: i put: state ]. + ^ out +] + +{ #category : 'structural-geometric' } +BioPolymer >> secondaryStructureFraction [ + "Returns the percentage of the sequence predicted or observed to be in Helix, Sheet, or Turn states." + | ss total helix sheet turn | + (self respondsTo: #secondaryStructure) + ifFalse: [ ^ Dictionary newFromPairs: { #helix . 0.0 . #sheet . 0.0 . #turn . 0.0 } ]. + ss := self secondaryStructure. + total := ss size asFloat. + total = 0 ifTrue: [ ^ Dictionary newFromPairs: { #helix . 0.0 . #sheet . 0.0 . #turn . 0.0 } ]. + helix := (ss count: [ :c | c = $H ]) asFloat / total. + sheet := (ss count: [ :c | c = $E ]) asFloat / total. + turn := (ss count: [ :c | c = $T ]) asFloat / total. + ^ Dictionary newFromPairs: { #helix . helix . #sheet . sheet . #turn . turn } +] + +{ #category : 'structural-geometric' } +BioPolymer >> solventAccessibleSurfaceArea [ + "Approximate SASA using per-residue maximal accessible area fractions. + If no structure/relative accessibility is present, returns theoretical maximum." + | maxAsa total | + self isProteinSequence ifFalse: [ ^ 0.0 ]. + maxAsa := self class maxAccessibleSurfaceByResidue. + total := 0.0. + self sequence do: [ :ch | + total := total + (maxAsa at: ch asUppercase ifAbsent: [ 0.0 ]) ]. + ^ total +] diff --git a/repository/BioPhysics/package.st b/repository/BioPhysics/package.st new file mode 100644 index 00000000..4bb1bb39 --- /dev/null +++ b/repository/BioPhysics/package.st @@ -0,0 +1 @@ +Package { #name : 'BioPhysics' } diff --git a/repository/BioTools/BioGtUnixSubprocess.class.st b/repository/BioTools/BioGtUnixSubprocess.class.st index 88fba17c..b52ef845 100644 --- a/repository/BioTools/BioGtUnixSubprocess.class.st +++ b/repository/BioTools/BioGtUnixSubprocess.class.st @@ -4,8 +4,9 @@ Execution engine backed by GToolkit subprocess API for running shell commands an Class { #name : 'BioGtUnixSubprocess', #superclass : 'BioExecutionEngine', - #category : 'BioTools', - #package : 'BioTools' + #category : 'BioTools-Adapters', + #package : 'BioTools', + #tag : 'Adapters' } { #category : 'executing' } diff --git a/repository/BioTools/BioSequence.class.st b/repository/BioTools/BioSequence.class.st index 97640b6e..ac31c189 100644 --- a/repository/BioTools/BioSequence.class.st +++ b/repository/BioTools/BioSequence.class.st @@ -19,7 +19,8 @@ Class { 'name', 'sequenceRecord', 'gcContent', - 'gcSkew' + 'gcSkew', + 'coordinates' ], #category : 'BioTools-Sequences', #package : 'BioTools', @@ -589,6 +590,16 @@ BioSequence >> contents [ ^ self sequence. ] +{ #category : 'as yet unclassified' } +BioSequence >> coordinates [ + ^ coordinates ifNil: [ #() ] +] + +{ #category : 'as yet unclassified' } +BioSequence >> coordinates: aCollection [ + coordinates := aCollection +] + { #category : 'copying' } BioSequence >> copyFrom: startInteger [ " Answer a copy of the receiver sliced from startInteger position up to the last position.