From f81c723996e75271e99ee6a79db1a01d576fbdb7 Mon Sep 17 00:00:00 2001 From: pi-dal Date: Fri, 29 May 2026 11:25:49 +0800 Subject: [PATCH] feat(pubmed): add workflow presets and richer article metadata Expand the built-in PubMed adapter from basic retrieval into a more useful research workflow surface. Why: - The existing adapter already covered the core retrieval chain, but it still required agents to handcraft common high-frequency PubMed queries. - Single-article output was also still too thin for downstream summarization and evidence triage. - The goal of this change is to improve agent-facing research ergonomics without introducing a separate duplicate plugin. What changed: - add `pubmed mesh ` for MeSH-term driven search - add `pubmed journal ` for journal-scoped search - add `pubmed clinical-trial ` as a preset over Clinical Trial + humans filters - add `pubmed review ` as a preset over Review article filtering - reshape `pubmed article ` into a single structured record instead of field/value rows - extend article detail output with `affiliations` and `grants` - update PubMed adapter docs and adapter index - regenerate `cli-manifest.json` Verification: - npm test -- clis/pubmed/pubmed.test.js - npm run build-manifest - npm run typecheck - npm run check:silent-column-drop - npm run check:typed-error-lint --- cli-manifest.json | 276 +++++++++++++++++++++++++++++++- clis/pubmed/article.js | 35 ++-- clis/pubmed/clinical-trial.js | 59 +++++++ clis/pubmed/journal.js | 59 +++++++ clis/pubmed/mesh.js | 47 ++++++ clis/pubmed/pubmed.test.js | 200 +++++++++++++++++++++-- clis/pubmed/review.js | 58 +++++++ clis/pubmed/utils.js | 9 ++ docs/adapters/browser/pubmed.md | 24 ++- docs/adapters/index.md | 2 +- 10 files changed, 740 insertions(+), 29 deletions(-) create mode 100644 clis/pubmed/clinical-trial.js create mode 100644 clis/pubmed/journal.js create mode 100644 clis/pubmed/mesh.js create mode 100644 clis/pubmed/review.js diff --git a/cli-manifest.json b/cli-manifest.json index 944958737..63aa53987 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -20595,9 +20595,24 @@ } ], "columns": [ - "field", - "value" + "pmid", + "title", + "authors", + "journal", + "year", + "date", + "article_type", + "language", + "doi", + "pmc", + "affiliations", + "grants", + "mesh_terms", + "keywords", + "abstract", + "url" ], + "defaultFormat": "plain", "type": "js", "modulePath": "pubmed/article.js", "sourceFile": "pubmed/article.js" @@ -20732,6 +20747,194 @@ "modulePath": "pubmed/citations.js", "sourceFile": "pubmed/citations.js" }, + { + "site": "pubmed", + "name": "clinical-trial", + "description": "Search PubMed clinical trials with a trial-study preset", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "query", + "type": "str", + "required": true, + "positional": true, + "help": "Clinical topic query, e.g. \"breast cancer\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "year-from", + "type": "int", + "required": false, + "help": "Filter publication year from" + }, + { + "name": "year-to", + "type": "int", + "required": false, + "help": "Filter publication year to" + }, + { + "name": "free-full-text", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include free full text articles" + }, + { + "name": "sort", + "type": "str", + "default": "date", + "required": false, + "help": "Sort by date or relevance", + "choices": [ + "date", + "relevance" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/clinical-trial.js", + "sourceFile": "pubmed/clinical-trial.js" + }, + { + "site": "pubmed", + "name": "journal", + "description": "Search PubMed articles by journal name", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "journal", + "type": "str", + "required": true, + "positional": true, + "help": "Journal name, e.g. \"Nature\" or \"The Lancet\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "year-from", + "type": "int", + "required": false, + "help": "Filter publication year from" + }, + { + "name": "year-to", + "type": "int", + "required": false, + "help": "Filter publication year to" + }, + { + "name": "sort", + "type": "str", + "default": "relevance", + "required": false, + "help": "Sort by relevance or date", + "choices": [ + "relevance", + "date" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/journal.js", + "sourceFile": "pubmed/journal.js" + }, + { + "site": "pubmed", + "name": "mesh", + "description": "Search PubMed articles by MeSH term", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "term", + "type": "str", + "required": true, + "positional": true, + "help": "MeSH term, e.g. \"Neoplasms\" or \"Machine Learning\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "major", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include articles where this is a major MeSH topic" + }, + { + "name": "sort", + "type": "str", + "default": "relevance", + "required": false, + "help": "Sort by relevance or date", + "choices": [ + "relevance", + "date" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/mesh.js", + "sourceFile": "pubmed/mesh.js" + }, { "site": "pubmed", "name": "related", @@ -20779,6 +20982,75 @@ "modulePath": "pubmed/related.js", "sourceFile": "pubmed/related.js" }, + { + "site": "pubmed", + "name": "review", + "description": "Search PubMed review articles with a review preset", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "query", + "type": "str", + "required": true, + "positional": true, + "help": "Review topic query, e.g. \"immunotherapy\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "year-from", + "type": "int", + "required": false, + "help": "Filter publication year from" + }, + { + "name": "year-to", + "type": "int", + "required": false, + "help": "Filter publication year to" + }, + { + "name": "has-abstract", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include articles with abstracts" + }, + { + "name": "sort", + "type": "str", + "default": "date", + "required": false, + "help": "Sort by date or relevance", + "choices": [ + "date", + "relevance" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/review.js", + "sourceFile": "pubmed/review.js" + }, { "site": "pubmed", "name": "search", diff --git a/clis/pubmed/article.js b/clis/pubmed/article.js index 9dd74b50a..760c1041b 100644 --- a/clis/pubmed/article.js +++ b/clis/pubmed/article.js @@ -11,11 +11,12 @@ cli({ domain: 'pubmed.ncbi.nlm.nih.gov', strategy: Strategy.PUBLIC, browser: false, + defaultFormat: 'plain', args: [ { name: 'pmid', positional: true, required: true, help: 'PubMed ID, e.g. 37780221' }, { name: 'full-abstract', type: 'boolean', default: false, help: 'Do not truncate the abstract in table output' }, ], - columns: ['field', 'value'], + columns: ['pmid', 'title', 'authors', 'journal', 'year', 'date', 'article_type', 'language', 'doi', 'pmc', 'affiliations', 'grants', 'mesh_terms', 'keywords', 'abstract', 'url'], func: async (args) => { const pmid = requirePmid(args.pmid); const xml = await eutilsFetch('efetch', { @@ -31,20 +32,24 @@ cli({ } const abstract = args['full-abstract'] ? article.abstract : truncateText(article.abstract, 500); return [ - { field: 'PMID', value: article.pmid }, - { field: 'Title', value: article.title }, - { field: 'Authors', value: article.authors.join(', ') }, - { field: 'Journal', value: article.journal }, - { field: 'Year', value: article.year }, - { field: 'Date', value: article.date }, - { field: 'Article Type', value: article.article_type }, - { field: 'Language', value: article.language }, - { field: 'DOI', value: article.doi || null }, - { field: 'PMC ID', value: article.pmc || null }, - { field: 'MeSH Terms', value: article.mesh_terms || null }, - { field: 'Keywords', value: article.keywords || null }, - { field: 'Abstract', value: abstract || null }, - { field: 'URL', value: article.url }, + { + pmid: article.pmid, + title: article.title, + authors: article.authors.join(', '), + journal: article.journal, + year: article.year, + date: article.date || null, + article_type: article.article_type, + language: article.language || null, + doi: article.doi || null, + pmc: article.pmc || null, + affiliations: article.affiliations || null, + grants: article.grants || null, + mesh_terms: article.mesh_terms || null, + keywords: article.keywords || null, + abstract: abstract || null, + url: article.url, + }, ]; }, }); diff --git a/clis/pubmed/clinical-trial.js b/clis/pubmed/clinical-trial.js new file mode 100644 index 000000000..64582f8ed --- /dev/null +++ b/clis/pubmed/clinical-trial.js @@ -0,0 +1,59 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + SEARCH_COLUMNS, + buildSearchQuery, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, + requireYear, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'clinical-trial', + access: 'read', + description: 'Search PubMed clinical trials with a trial-study preset', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'query', positional: true, required: true, help: 'Clinical topic query, e.g. "breast cancer"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'year-from', type: 'int', help: 'Filter publication year from' }, + { name: 'year-to', type: 'int', help: 'Filter publication year to' }, + { name: 'free-full-text', type: 'boolean', default: false, help: 'Only include free full text articles' }, + { name: 'sort', default: 'date', choices: ['date', 'relevance'], help: 'Sort by date or relevance' }, + ], + columns: SEARCH_COLUMNS, + func: async (args) => { + const query = requireText(args.query, 'query'); + const limit = requireBoundedInt(args.limit, 20, 100); + const yearFrom = requireYear(args['year-from'], 'year-from'); + const yearTo = requireYear(args['year-to'], 'year-to'); + const sort = requireChoice(args.sort, ['date', 'relevance'], 'sort', 'date'); + const searchQuery = buildSearchQuery(query, { + yearFrom, + yearTo, + articleType: 'Clinical Trial', + hasFullText: args['free-full-text'], + humanOnly: true, + }); + const esearch = await eutilsFetch('esearch', { + term: searchQuery, + retmax: limit, + usehistory: 'y', + sort: sort === 'date' ? 'pub_date' : '', + }, { label: 'pubmed clinical-trial' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed clinical-trial did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed clinical-trial', `No clinical trial articles matched "${query}".`); + } + return fetchSummaryRows(pmids, 'pubmed clinical-trial summary'); + }, +}); diff --git a/clis/pubmed/journal.js b/clis/pubmed/journal.js new file mode 100644 index 000000000..21efdb867 --- /dev/null +++ b/clis/pubmed/journal.js @@ -0,0 +1,59 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + SEARCH_COLUMNS, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, + requireYear, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'journal', + access: 'read', + description: 'Search PubMed articles by journal name', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'journal', positional: true, required: true, help: 'Journal name, e.g. "Nature" or "The Lancet"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'year-from', type: 'int', help: 'Filter publication year from' }, + { name: 'year-to', type: 'int', help: 'Filter publication year to' }, + { name: 'sort', default: 'relevance', choices: ['relevance', 'date'], help: 'Sort by relevance or date' }, + ], + columns: SEARCH_COLUMNS, + func: async (args) => { + const journal = requireText(args.journal, 'journal'); + const limit = requireBoundedInt(args.limit, 20, 100); + const yearFrom = requireYear(args['year-from'], 'year-from'); + const yearTo = requireYear(args['year-to'], 'year-to'); + const sort = requireChoice(args.sort, ['relevance', 'date'], 'sort', 'relevance'); + const terms = [`${journal}[Journal]`]; + if (yearFrom || yearTo) { + const from = yearFrom || 1800; + const to = yearTo || new Date().getFullYear(); + if (from > to) { + throw new ArgumentError('pubmed year-from must be <= year-to'); + } + terms.push(`${from}:${to}[PDAT]`); + } + const esearch = await eutilsFetch('esearch', { + term: terms.join(' AND '), + retmax: limit, + usehistory: 'y', + sort: sort === 'date' ? 'pub_date' : '', + }, { label: 'pubmed journal' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed journal did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed journal', `No articles found for journal "${journal}".`); + } + return fetchSummaryRows(pmids, 'pubmed journal summary'); + }, +}); diff --git a/clis/pubmed/mesh.js b/clis/pubmed/mesh.js new file mode 100644 index 000000000..08c07030a --- /dev/null +++ b/clis/pubmed/mesh.js @@ -0,0 +1,47 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + SEARCH_COLUMNS, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'mesh', + access: 'read', + description: 'Search PubMed articles by MeSH term', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'term', positional: true, required: true, help: 'MeSH term, e.g. "Neoplasms" or "Machine Learning"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'major', type: 'boolean', default: false, help: 'Only include articles where this is a major MeSH topic' }, + { name: 'sort', default: 'relevance', choices: ['relevance', 'date'], help: 'Sort by relevance or date' }, + ], + columns: SEARCH_COLUMNS, + func: async (args) => { + const term = requireText(args.term, 'term'); + const limit = requireBoundedInt(args.limit, 20, 100); + const sort = requireChoice(args.sort, ['relevance', 'date'], 'sort', 'relevance'); + const tag = args.major ? 'Majr' : 'MeSH Terms'; + const esearch = await eutilsFetch('esearch', { + term: `${term}[${tag}]`, + retmax: limit, + usehistory: 'y', + sort: sort === 'date' ? 'pub_date' : '', + }, { label: 'pubmed mesh' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed mesh did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed mesh', `No articles found for MeSH term "${term}".`); + } + return fetchSummaryRows(pmids, 'pubmed mesh summary'); + }, +}); diff --git a/clis/pubmed/pubmed.test.js b/clis/pubmed/pubmed.test.js index bd21ae0e5..835918fd8 100644 --- a/clis/pubmed/pubmed.test.js +++ b/clis/pubmed/pubmed.test.js @@ -16,6 +16,10 @@ import './article.js'; import './author.js'; import './citations.js'; import './related.js'; +import './clinical-trial.js'; +import './review.js'; +import './mesh.js'; +import './journal.js'; const SUMMARY_RESULT = { result: { @@ -52,18 +56,42 @@ const ARTICLE_XML = ` Detailed PubMed article & title. Background text.Conclusion text. - AliceExample - BobB + AliceExampleDepartment of Oncology, SYSU. + BobBState Key Laboratory of Oncology. eng Review Neoplasms machine learning + + 81972898National Natural Science Foundation of China + 2024A1515010001Guangdong Basic and Applied Basic Research Foundation + 10.1000/detailPMC123 `; +const LONG_ARTICLE_XML = ` + + +
+ + Clinical Trials Journal + 2025Feb14 + + Long abstract trial paper. + ${'Trial outcome sentence. '.repeat(40)} + + ChanRobin + + eng + Clinical Trial +
+
+ 10.1000/trial +
`; + function jsonResponse(body, ok = true, status = 200) { return { ok, @@ -87,16 +115,19 @@ afterEach(() => { }); describe('pubmed adapter registration', () => { - it('registers five public read commands with expected listing columns', () => { + it('registers nine public read commands with expected listing columns', () => { const registry = getRegistry(); - for (const name of ['search', 'article', 'author', 'citations', 'related']) { + for (const name of ['search', 'article', 'author', 'citations', 'related', 'clinical-trial', 'review', 'mesh', 'journal']) { const command = registry.get(`pubmed/${name}`); expect(command).toBeDefined(); expect(command.strategy).toBe('public'); expect(command.browser).toBe(false); - expect(command.access).toBe('read'); } expect(registry.get('pubmed/search').columns).toEqual(SEARCH_COLUMNS); + expect(registry.get('pubmed/clinical-trial').columns).toEqual(SEARCH_COLUMNS); + expect(registry.get('pubmed/review').columns).toEqual(SEARCH_COLUMNS); + expect(registry.get('pubmed/mesh').columns).toEqual(SEARCH_COLUMNS); + expect(registry.get('pubmed/journal').columns).toEqual(SEARCH_COLUMNS); expect(registry.get('pubmed/author').columns).toEqual(LINK_COLUMNS); expect(registry.get('pubmed/citations').columns).toEqual(LINK_COLUMNS); expect(registry.get('pubmed/related').columns).toEqual(RELATED_COLUMNS); @@ -200,13 +231,93 @@ describe('pubmed search command', () => { }); }); +describe('pubmed mesh command', () => { + it('searches by MeSH term with optional major-topic filter', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } })); + vi.stubGlobal('fetch', fetchMock); + const rows = await getRegistry().get('pubmed/mesh').func({ term: 'Neoplasms', major: true, limit: 1, sort: 'date' }); + expect(rows).toHaveLength(1); + expect(rows[0].pmid).toBe('123'); + const url = fetchMock.mock.calls[0][0]; + expect(url).toContain('Neoplasms%5BMajr%5D'); + expect(url).toContain('sort=pub_date'); + }); + + it('rejects invalid mesh args and empty results', async () => { + const command = getRegistry().get('pubmed/mesh'); + await expect(command.func({ term: '' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ term: 'Neoplasms', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ term: 'Neoplasms', sort: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ term: 'Neoplasms' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('pubmed journal command', () => { + it('searches by journal with optional year range and date sort', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } })); + vi.stubGlobal('fetch', fetchMock); + const rows = await getRegistry().get('pubmed/journal').func({ + journal: 'Nature', + 'year-from': 2020, + 'year-to': 2024, + sort: 'date', + limit: 1, + }); + expect(rows).toHaveLength(1); + expect(rows[0].pmid).toBe('123'); + const url = fetchMock.mock.calls[0][0]; + expect(url).toContain('Nature%5BJournal%5D'); + expect(url).toContain('2020%3A2024%5BPDAT%5D'); + expect(url).toContain('sort=pub_date'); + }); + + it('rejects invalid journal args and empty results', async () => { + const command = getRegistry().get('pubmed/journal'); + await expect(command.func({ journal: '' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ journal: 'Nature', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ journal: 'Nature', sort: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ journal: 'Nature', 'year-from': 2025, 'year-to': 2020 })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ journal: 'Nature' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + describe('pubmed article command', () => { - it('returns field/value rows for a valid article', async () => { + it('returns a single structured row for a valid article', async () => { vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(xmlResponse(ARTICLE_XML))); const rows = await getRegistry().get('pubmed/article').func({ pmid: '123' }); - expect(rows).toContainEqual({ field: 'PMID', value: '123' }); - expect(rows).toContainEqual({ field: 'DOI', value: '10.1000/detail' }); - expect(rows.find(row => row.field === 'Abstract').value).toContain('Background text'); + expect(rows).toEqual([expect.objectContaining({ + pmid: '123', + title: 'Detailed PubMed article & title.', + authors: 'Alice Example, Bob B', + journal: 'Journal of Tests', + year: '2024', + article_type: 'Review', + doi: '10.1000/detail', + pmc: 'PMC123', + affiliations: 'Department of Oncology, SYSU. | State Key Laboratory of Oncology.', + grants: '81972898: National Natural Science Foundation of China | 2024A1515010001: Guangdong Basic and Applied Basic Research Foundation', + abstract: 'Background text. Conclusion text.', + url: 'https://pubmed.ncbi.nlm.nih.gov/123/', + })]); + }); + + it('truncates long abstracts by default and expands them with --full-abstract', async () => { + const command = getRegistry().get('pubmed/article'); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(xmlResponse(LONG_ARTICLE_XML))); + const truncatedRows = await command.func({ pmid: '555' }); + expect(truncatedRows[0].abstract.length).toBeLessThan(520); + expect(truncatedRows[0].abstract.endsWith('...')).toBe(true); + + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(xmlResponse(LONG_ARTICLE_XML))); + const fullRows = await command.func({ pmid: '555', 'full-abstract': true }); + expect(fullRows[0].abstract.length).toBeGreaterThan(800); + expect(fullRows[0].abstract.endsWith('...')).toBe(false); }); it('rejects invalid or missing articles with typed errors', async () => { @@ -217,6 +328,77 @@ describe('pubmed article command', () => { }); }); +describe('pubmed clinical-trial command', () => { + it('searches with the clinical-trial preset and optional free-full-text filter', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } })); + vi.stubGlobal('fetch', fetchMock); + const rows = await getRegistry().get('pubmed/clinical-trial').func({ + query: 'breast cancer', + 'year-from': 2020, + 'year-to': 2024, + 'free-full-text': true, + sort: 'date', + limit: 1, + }); + expect(rows).toHaveLength(1); + expect(rows[0].pmid).toBe('123'); + const url = fetchMock.mock.calls[0][0]; + expect(url).toContain('breast+cancer'); + expect(url).toContain('Clinical+Trial%5BPT%5D'); + expect(url).toContain('humans%5Bmesh%5D'); + expect(url).toContain('free+full+text%5Bsb%5D'); + expect(url).toContain('2020%3A2024%5BPDAT%5D'); + expect(url).toContain('sort=pub_date'); + }); + + it('rejects invalid clinical-trial args and empty results', async () => { + const command = getRegistry().get('pubmed/clinical-trial'); + await expect(command.func({ query: '' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'breast cancer', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'breast cancer', sort: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'breast cancer', 'year-from': 2025, 'year-to': 2020 })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ query: 'breast cancer' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('pubmed review command', () => { + it('searches with the review preset and optional abstract filter', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } })); + vi.stubGlobal('fetch', fetchMock); + const rows = await getRegistry().get('pubmed/review').func({ + query: 'immunotherapy', + 'year-from': 2021, + 'year-to': 2024, + 'has-abstract': true, + sort: 'date', + limit: 1, + }); + expect(rows).toHaveLength(1); + expect(rows[0].pmid).toBe('123'); + const url = fetchMock.mock.calls[0][0]; + expect(url).toContain('immunotherapy'); + expect(url).toContain('Review%5BPT%5D'); + expect(url).toContain('hasabstract%5Btext%5D'); + expect(url).toContain('2021%3A2024%5BPDAT%5D'); + expect(url).toContain('sort=pub_date'); + }); + + it('rejects invalid review args and empty results', async () => { + const command = getRegistry().get('pubmed/review'); + await expect(command.func({ query: '' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'immunotherapy', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'immunotherapy', sort: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'immunotherapy', 'year-from': 2025, 'year-to': 2020 })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ query: 'immunotherapy' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + describe('pubmed author command', () => { it('searches author position and affiliation filters', async () => { const fetchMock = vi.fn() diff --git a/clis/pubmed/review.js b/clis/pubmed/review.js new file mode 100644 index 000000000..038eac0ef --- /dev/null +++ b/clis/pubmed/review.js @@ -0,0 +1,58 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + SEARCH_COLUMNS, + buildSearchQuery, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, + requireYear, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'review', + access: 'read', + description: 'Search PubMed review articles with a review preset', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'query', positional: true, required: true, help: 'Review topic query, e.g. "immunotherapy"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'year-from', type: 'int', help: 'Filter publication year from' }, + { name: 'year-to', type: 'int', help: 'Filter publication year to' }, + { name: 'has-abstract', type: 'boolean', default: false, help: 'Only include articles with abstracts' }, + { name: 'sort', default: 'date', choices: ['date', 'relevance'], help: 'Sort by date or relevance' }, + ], + columns: SEARCH_COLUMNS, + func: async (args) => { + const query = requireText(args.query, 'query'); + const limit = requireBoundedInt(args.limit, 20, 100); + const yearFrom = requireYear(args['year-from'], 'year-from'); + const yearTo = requireYear(args['year-to'], 'year-to'); + const sort = requireChoice(args.sort, ['date', 'relevance'], 'sort', 'date'); + const searchQuery = buildSearchQuery(query, { + yearFrom, + yearTo, + articleType: 'Review', + hasAbstract: args['has-abstract'], + }); + const esearch = await eutilsFetch('esearch', { + term: searchQuery, + retmax: limit, + usehistory: 'y', + sort: sort === 'date' ? 'pub_date' : '', + }, { label: 'pubmed review' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed review did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed review', `No review articles matched "${query}".`); + } + return fetchSummaryRows(pmids, 'pubmed review summary'); + }, +}); diff --git a/clis/pubmed/utils.js b/clis/pubmed/utils.js index 4b1c849b1..28ff73b0c 100644 --- a/clis/pubmed/utils.js +++ b/clis/pubmed/utils.js @@ -283,6 +283,13 @@ export function parseArticleXml(xml, pmid) { const pubTypes = extractAll(articleBlock, 'PublicationType'); const meshTerms = extractAll(text, 'DescriptorName'); const keywords = extractAll(text, 'Keyword'); + const affiliations = extractAll(text, 'Affiliation'); + const grantBlocks = [...text.matchAll(/]*>([\s\S]*?)<\/Grant>/gi)].map(match => match[1]); + const grants = grantBlocks.map(block => { + const grantId = extractFirst(block, 'GrantID'); + const agency = extractFirst(block, 'Agency'); + return [grantId, agency].filter(Boolean).join(': '); + }).filter(Boolean); const doi = text.match(/]*IdType="doi"[^>]*>([\s\S]*?)<\/ArticleId>/i)?.[1] || ''; const pmc = text.match(/]*IdType="pmc"[^>]*>([\s\S]*?)<\/ArticleId>/i)?.[1] || ''; return { @@ -297,6 +304,8 @@ export function parseArticleXml(xml, pmid) { pmc: cleanText(pmc), article_type: articleTypeFromList(pubTypes), language: extractFirst(articleBlock, 'Language'), + affiliations: affiliations.slice(0, 10).join(' | '), + grants: grants.slice(0, 10).join(' | '), mesh_terms: meshTerms.slice(0, 10).join(', '), keywords: keywords.slice(0, 10).join(', '), url: buildPubMedUrl(pmid), diff --git a/docs/adapters/browser/pubmed.md b/docs/adapters/browser/pubmed.md index a85dd6360..fd26ea06b 100644 --- a/docs/adapters/browser/pubmed.md +++ b/docs/adapters/browser/pubmed.md @@ -11,6 +11,10 @@ | `opencli pubmed author` | Search articles by author and affiliation | | `opencli pubmed citations` | List cited-by or reference relationships | | `opencli pubmed related` | Find related PubMed articles | +| `opencli pubmed clinical-trial` | Search PubMed clinical trials with a preset query profile | +| `opencli pubmed review` | Search PubMed review articles with a preset query profile | +| `opencli pubmed mesh` | Search articles by MeSH term | +| `opencli pubmed journal` | Search articles by journal name | ## Usage Examples @@ -30,13 +34,29 @@ opencli pubmed citations 37780221 --direction references --limit 20 # Related articles with scores opencli pubmed related 37780221 --score + +# Clinical trial preset +opencli pubmed clinical-trial "breast cancer" --year-from 2020 --free-full-text --limit 10 + +# Review preset +opencli pubmed review "immunotherapy" --year-from 2021 --has-abstract --limit 10 + +# Search by MeSH term +opencli pubmed mesh "Neoplasms" --major --limit 10 + +# Search by journal +opencli pubmed journal "Nature" --year-from 2020 --sort date --limit 10 ``` ## Output Listing commands return `pmid`, `title`, `authors`, `journal`, `year`, `article_type`, `doi`, and `url` where available. The `pmid` column is the stable identifier for `opencli pubmed article `. -`article` returns field/value rows for title, authors, journal, year/date, DOI/PMC ID, MeSH terms, keywords, abstract, and PubMed URL. +`article` now returns a single structured row: `pmid`, `title`, `authors`, `journal`, `year`, `date`, `article_type`, `language`, `doi`, `pmc`, `affiliations`, `grants`, `mesh_terms`, `keywords`, `abstract`, and `url`. By default the abstract is truncated for readability; pass `--full-abstract` when you need the complete abstract text. + +`clinical-trial` is a preset over the same E-utilities search path. It always adds the PubMed `Clinical Trial[PT]` and `humans[mesh]` filters, and optionally adds `free full text[sb]`. + +`review` is the parallel preset for literature overviews. It always adds the PubMed `Review[PT]` filter, and can optionally require abstracts with `--has-abstract`. ## Prerequisites @@ -51,6 +71,6 @@ export NCBI_EMAIL=you@example.com ## Failure Semantics -- Invalid `pmid`, `limit`, year, `sort`, `position`, or `direction` values fail before network access with `ArgumentError`. +- Invalid `pmid`, `limit`, year, `sort`, `position`, `direction`, MeSH `term`, `journal`, `clinical-trial` query, or `review` query values fail before network access with `ArgumentError`. - HTTP errors, fetch failures, invalid JSON, E-utilities error envelopes, and partial summary payloads fail with `CommandExecutionError`. - Valid no-result searches and missing relationships fail with `EmptyResultError`. diff --git a/docs/adapters/index.md b/docs/adapters/index.md index a0057329e..1f2155c65 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -104,7 +104,7 @@ Run `opencli list` for the live registry. | **[yahoo-finance](./browser/yahoo-finance.md)** | `quote` | 🌐 Public | | **[arxiv](./browser/arxiv.md)** | `search` `paper` | 🌐 Public | | **[dblp](./browser/dblp.md)** | `search` `author` `paper` `venue` | 🌐 Public | -| **[pubmed](./browser/pubmed.md)** | `search` `article` `author` `citations` `related` | 🌐 Public | +| **[pubmed](./browser/pubmed.md)** | `search` `article` `author` `citations` `related` `clinical-trial` `review` `mesh` `journal` | 🌐 Public | | **[openreview](./browser/openreview.md)** | `search` `venue` `author` `paper` `reviews` | 🌐 Public | | **[paperreview](./browser/paperreview.md)** | `submit` `review` `feedback` | 🌐 Public | | **[barchart](./browser/barchart.md)** | `quote` `options` `greeks` `flow` | 🌐 Public |