Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Changelog

## Version 0.0.1
## Version 0.0.1 - 0.0.2

- Initial release of the package with class structure and basic functionality.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# TxDb

This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as `GenomicRanges` objects. It also includes a registry system to easily download and cache standard TxDb databases.
This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as [GenomicRanges](https://github.com/biocpy/genomicranges) objects. It also includes a registry system to easily download and cache standard TxDb annotation files.

## Install

Expand All @@ -17,7 +17,7 @@ pip install txdb

### Using TxDbRegistry

The TxDbRegistry provides easy access to hosted TxDb databases in AnnotationHub.
The TxDbRegistry provides easy access to hosted TxDb databases in [AnnotationHub](https://bioconductor.org/packages/release/bioc/html/AnnotationHub.html).

```python
from txdb import TxDbRegistry
Expand All @@ -30,7 +30,7 @@ print(registry.list_txdb())

# Load a specific database (downloads and caches it automatically)
# Example: hg38 knownGene
txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite")
txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene")

# Access features
transcripts = txdb.transcripts()
Expand Down
90 changes: 45 additions & 45 deletions src/txdb/_ahub.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,183 +29,183 @@
__license__ = "MIT"

TXDB_CONFIG = {
"TxDb.Athaliana.BioMart.plantsmart22.sqlite": {
"TxDb.Athaliana.BioMart.plantsmart22": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart22.sqlite",
},
"TxDb.Athaliana.BioMart.plantsmart25.sqlite": {
"TxDb.Athaliana.BioMart.plantsmart25": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart25.sqlite",
},
"TxDb.Athaliana.BioMart.plantsmart28.sqlite": {
"TxDb.Athaliana.BioMart.plantsmart28": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart28.sqlite",
},
"TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite": {
"TxDb.Btaurus.UCSC.bosTau8.refGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite",
},
"TxDb.Celegans.UCSC.ce11.refGene.sqlite": {
"TxDb.Celegans.UCSC.ce11.refGene": {
"release_date": "2019-05-01",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Celegans.UCSC.ce11.refGene.sqlite",
},
"TxDb.Celegans.UCSC.ce6.ensGene.sqlite": {
"TxDb.Celegans.UCSC.ce6.ensGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Celegans.UCSC.ce6.ensGene.sqlite",
},
"TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite": {
"TxDb.Cfamiliaris.UCSC.canFam3.refGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite",
},
"TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite": {
"TxDb.Dmelanogaster.UCSC.dm3.ensGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite",
},
"TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite": {
"TxDb.Dmelanogaster.UCSC.dm6.ensGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite",
},
"TxDb.Drerio.UCSC.danRer10.refGene.sqlite": {
"TxDb.Drerio.UCSC.danRer10.refGene": {
"release_date": "2019-05-01",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer10.refGene.sqlite",
},
"TxDb.Ggallus.UCSC.galGal4.refGene.sqlite": {
"TxDb.Ggallus.UCSC.galGal4.refGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Ggallus.UCSC.galGal4.refGene.sqlite",
},
"TxDb.Hsapiens.BioMart.igis.sqlite": {
"TxDb.Hsapiens.BioMart.igis": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.BioMart.igis.sqlite",
},
"TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite": {
"TxDb.Hsapiens.UCSC.hg18.knownGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite",
},
"TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite": {
"TxDb.Hsapiens.UCSC.hg19.knownGene": {
"release_date": "2025-10-29",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite",
},
"TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite": {
"TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite",
},
"TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite": {
"TxDb.Hsapiens.UCSC.hg38.knownGene": {
"release_date": "2025-10-29",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite",
},
"TxDb.Hsapiens.UCSC.hg38.refGene.sqlite": {
"TxDb.Hsapiens.UCSC.hg38.refGene": {
"release_date": "2024-04-02",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Hsapiens.UCSC.hg38.refGene.sqlite",
},
"TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite": {
"TxDb.Mmulatta.UCSC.rheMac3.refGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite",
},
"TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite": {
"TxDb.Mmulatta.UCSC.rheMac8.refGene": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite",
},
"TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite": {
"TxDb.Mmulatta.UCSC.rheMac10.refGene": {
"release_date": "2021-10-08",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite",
},
"TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite": {
"TxDb.Mmusculus.UCSC.mm10.ensGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite",
},
"TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite": {
"TxDb.Mmusculus.UCSC.mm10.knownGene": {
"release_date": "2019-05-01",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite",
},
"TxDb.Mmusculus.UCSC.mm39.refGene.sqlite": {
"TxDb.Mmusculus.UCSC.mm39.refGene": {
"release_date": "2024-04-02",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Mmusculus.UCSC.mm39.refGene.sqlite",
},
"TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite": {
"TxDb.Mmusculus.UCSC.mm39.knownGene": {
"release_date": "2025-03-11",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.21/TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite",
},
"TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite": {
"TxDb.Mmusculus.UCSC.mm9.knownGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite",
},
"TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite": {
"TxDb.Ptroglodytes.UCSC.panTro4.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite",
},
"TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite": {
"TxDb.Ptroglodytes.UCSC.panTro5.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite",
},
"TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite": {
"TxDb.Ptroglodytes.UCSC.panTro6.refGene": {
"release_date": "2019-10-29",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite",
},
"TxDb.Rnorvegicus.BioMart.igis.sqlite": {
"TxDb.Rnorvegicus.BioMart.igis": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.BioMart.igis.sqlite",
},
"TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite": {
"TxDb.Rnorvegicus.UCSC.rn4.ensGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite",
},
"TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite": {
"TxDb.Rnorvegicus.UCSC.rn5.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite",
},
"TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite": {
"TxDb.Rnorvegicus.UCSC.rn6.refGene": {
"release_date": "2019-05-01",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite",
},
"TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite": {
"TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq": {
"release_date": "2020-10-20",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite",
},
"TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite": {
"TxDb.Rnorvegicus.UCSC.rn7.refGene": {
"release_date": "2022-04-18",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite",
},
"TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite": {
"TxDb.Scerevisiae.UCSC.sacCer2.sgdGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite",
},
"TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite": {
"TxDb.Scerevisiae.UCSC.sacCer3.sgdGene": {
"release_date": "2016-12-22",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite",
},
"TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite": {
"TxDb.Sscrofa.UCSC.susScr3.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite",
},
"TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite": {
"TxDb.Sscrofa.UCSC.susScr11.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite",
},
"TxDb.Ggallus.UCSC.galGal5.refGene.sqlite": {
"TxDb.Ggallus.UCSC.galGal5.refGene": {
"release_date": "2020-04-27",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ggallus.UCSC.galGal5.refGene.sqlite",
},
"TxDb.Ggallus.UCSC.galGal6.refGene.sqlite": {
"TxDb.Ggallus.UCSC.galGal6.refGene": {
"release_date": "2019-10-29",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ggallus.UCSC.galGal6.refGene.sqlite",
},
"TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite": {
"TxDb.Cfamiliaris.UCSC.canFam4.refGene": {
"release_date": "2021-10-08",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite",
},
"TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite": {
"TxDb.Cfamiliaris.UCSC.canFam5.refGene": {
"release_date": "2021-10-08",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite",
},
"TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite": {
"TxDb.Cfamiliaris.UCSC.canFam6.refGene": {
"release_date": "2023-04-06",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.17/TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite",
},
"TxDb.Celegans.UCSC.ce11.ensGene.sqlite": {
"TxDb.Celegans.UCSC.ce11.ensGene": {
"release_date": "2022-04-18",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Celegans.UCSC.ce11.ensGene.sqlite",
},
"TxDb.Drerio.UCSC.danRer11.refGene.sqlite": {
"TxDb.Drerio.UCSC.danRer11.refGene": {
"release_date": "2019-05-01",
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer11.refGene.sqlite",
},
Expand Down
2 changes: 1 addition & 1 deletion tests/test_real.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_real_txdb_workflow(tmp_path):
registry = TxDbRegistry(cache_dir=tmp_path / "cache")
txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene.sqlite"
txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene"

assert txdb_id in registry.list_txdb()

Expand Down
4 changes: 2 additions & 2 deletions tests/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def registry(tmp_path):

def test_registry_init(registry):
assert isinstance(registry, TxDbRegistry)
assert "TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite" in registry.list_txdb()
assert "TxDb.Mmusculus.UCSC.mm10.knownGene" in registry.list_txdb()


# @patch("txdb.txdbregistry.BiocFileCache")
Expand All @@ -55,7 +55,7 @@ def test_registry_init(registry):
# registry._bfc = mock_bfc

# # Test load_db
# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite")
# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene")

# assert isinstance(txdb, TxDb)
# assert txdb.dbpath == mock_db_file
Expand Down