From 77fe8a83707add1d098733fa86304cd1ed5659f5 Mon Sep 17 00:00:00 2001 From: Dong Nguyen Date: Thu, 4 Sep 2025 11:18:19 +0700 Subject: [PATCH 1/3] v8.0.20 - Update dependencies --- package.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 03f5e21..8cae001 100644 --- a/package.json +++ b/package.json @@ -33,16 +33,16 @@ "@mozilla/readability": "^0.6.0", "@ndaidong/bellajs": "^12.0.1", "cross-fetch": "^4.1.0", - "linkedom": "^0.18.10", - "sanitize-html": "2.16.0" + "linkedom": "^0.18.12", + "sanitize-html": "2.17.0" }, "devDependencies": { - "@eslint/js": "^9.26.0", + "@eslint/js": "^9.34.0", "@types/sanitize-html": "^2.16.0", - "eslint": "^9.26.0", - "globals": "^16.1.0", + "eslint": "^9.34.0", + "globals": "^16.3.0", "https-proxy-agent": "^7.0.6", - "nock": "^14.0.4" + "nock": "^14.0.10" }, "keywords": [ "article", From 26778c1f271bc6db49b5be78256ea9bedfadd854 Mon Sep 17 00:00:00 2001 From: Dong Nguyen Date: Thu, 4 Sep 2025 11:21:03 +0700 Subject: [PATCH 2/3] Remove examples - To stop dependencies outdated warning --- examples/browser-article-parser/.gitignore | 17 -- examples/browser-article-parser/README.md | 28 --- examples/browser-article-parser/package.json | 12 -- .../public/chota.min.css | 1 - .../browser-article-parser/public/index.html | 88 --------- examples/browser-article-parser/server.js | 30 --- examples/bun-article-parser/.gitignore | 171 ------------------ examples/bun-article-parser/README.md | 17 -- examples/bun-article-parser/index.ts | 42 ----- examples/bun-article-parser/package.json | 14 -- examples/bun-article-parser/tsconfig.json | 9 - examples/deno-article-parser/README.md | 11 -- examples/deno-article-parser/deno.json | 18 -- examples/deno-article-parser/index.ts | 43 ----- examples/node-article-parser/.gitignore | 17 -- examples/node-article-parser/README.md | 19 -- examples/node-article-parser/index.js | 38 ---- examples/node-article-parser/package.json | 13 -- examples/pupperteer/README.md | 19 -- examples/pupperteer/index.js | 64 ------- examples/pupperteer/package.json | 14 -- examples/tsnode-article-parser/.gitignore | 19 -- examples/tsnode-article-parser/README.md | 22 --- examples/tsnode-article-parser/index.ts | 38 ---- examples/tsnode-article-parser/package.json | 17 -- examples/tsnode-article-parser/tsconfig.json | 11 -- package.json | 2 +- 27 files changed, 1 insertion(+), 793 deletions(-) delete mode 100644 examples/browser-article-parser/.gitignore delete mode 100644 examples/browser-article-parser/README.md delete mode 100644 examples/browser-article-parser/package.json delete mode 100644 examples/browser-article-parser/public/chota.min.css delete mode 100644 examples/browser-article-parser/public/index.html delete mode 100644 examples/browser-article-parser/server.js delete mode 100644 examples/bun-article-parser/.gitignore delete mode 100644 examples/bun-article-parser/README.md delete mode 100644 examples/bun-article-parser/index.ts delete mode 100644 examples/bun-article-parser/package.json delete mode 100644 examples/bun-article-parser/tsconfig.json delete mode 100644 examples/deno-article-parser/README.md delete mode 100644 examples/deno-article-parser/deno.json delete mode 100644 examples/deno-article-parser/index.ts delete mode 100644 examples/node-article-parser/.gitignore delete mode 100644 examples/node-article-parser/README.md delete mode 100644 examples/node-article-parser/index.js delete mode 100644 examples/node-article-parser/package.json delete mode 100644 examples/pupperteer/README.md delete mode 100644 examples/pupperteer/index.js delete mode 100644 examples/pupperteer/package.json delete mode 100644 examples/tsnode-article-parser/.gitignore delete mode 100644 examples/tsnode-article-parser/README.md delete mode 100644 examples/tsnode-article-parser/index.ts delete mode 100644 examples/tsnode-article-parser/package.json delete mode 100644 examples/tsnode-article-parser/tsconfig.json diff --git a/examples/browser-article-parser/.gitignore b/examples/browser-article-parser/.gitignore deleted file mode 100644 index 48012a9..0000000 --- a/examples/browser-article-parser/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -# Logs -logs -*.log -*.debug - -# Runtime data -*.pid -*.seed - -node_modules -coverage -.nyc_output - -.DS_Store -yarn.lock -coverage.lcov -pnpm-lock.yaml diff --git a/examples/browser-article-parser/README.md b/examples/browser-article-parser/README.md deleted file mode 100644 index 23b26df..0000000 --- a/examples/browser-article-parser/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# browser-article-parser - -This demo shows how to use `@extractus/article-extractor` at client side, with or without proxy. - -To install: - -```bash -npm i - -# or pnpm, yarn -``` - -Start server: - -```bash -npm start -``` - -Open `http://localhost:3100/` to test. - -Basically `@extractus/article-extractor` only works at server side. - -However there are some noble publishers those enable `Access-Control-Allow-Origin` on their service. -For example with articles from [bitcoin.com](https://news.bitcoin.com/the-future-of-nft-is-evt-the-new-game-changer-token/), [CNBC](https://www.cnbc.com/2022/09/21/what-another-major-rate-hike-by-the-federal-reserve-means-to-you.html) or [Decrypt](https://decrypt.co/110356/cardano-blockchain-moves-forward-with-vasil-upgrade) we can extract from browser. - -With the remaining cases, we need a proxy layer to bypass CORS policy. - ---- diff --git a/examples/browser-article-parser/package.json b/examples/browser-article-parser/package.json deleted file mode 100644 index fd76eb4..0000000 --- a/examples/browser-article-parser/package.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "name": "browser-article-parser", - "version": "1.0.0", - "type": "module", - "scripts": { - "start": "node server" - }, - "dependencies": { - "express": "latest", - "got": "latest" - } -} diff --git a/examples/browser-article-parser/public/chota.min.css b/examples/browser-article-parser/public/chota.min.css deleted file mode 100644 index b445236..0000000 --- a/examples/browser-article-parser/public/chota.min.css +++ /dev/null @@ -1 +0,0 @@ -/*! chota.css v0.7.2 | MIT License | github.com/jenil/chota */:root{--bg-color:#fff;--bg-secondary-color:#f3f3f6;--color-primary:#14854f;--color-lightGrey:#d2d6dd;--color-grey:#747681;--color-darkGrey:#3f4144;--color-error:#d43939;--color-success:#28bd14;--grid-maxWidth:120rem;--grid-gutter:2rem;--font-size:1.6rem;--font-color:#333;--font-family-sans:-apple-system,BlinkMacSystemFont,Avenir,"Avenir Next","Segoe UI","Roboto","Oxygen","Ubuntu","Cantarell","Fira Sans","Droid Sans","Helvetica Neue",sans-serif;--font-family-mono:monaco,"Consolas","Lucida Console",monospace}html{-webkit-box-sizing:border-box;box-sizing:border-box;font-size:62.5%;line-height:1.15;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}*,:after,:before{-webkit-box-sizing:inherit;box-sizing:inherit}body{background-color:var(--bg-color);line-height:1.6;font-size:var(--font-size);color:var(--font-color);font-family:Segoe UI,Helvetica Neue,sans-serif;font-family:var(--font-family-sans);margin:0;padding:0}h1,h2,h3,h4,h5,h6{font-weight:500;margin:.35em 0 .7em}h1{font-size:2em}h2{font-size:1.75em}h3{font-size:1.5em}h4{font-size:1.25em}h5{font-size:1em}h6{font-size:.85em}a{color:var(--color-primary);text-decoration:none}a:hover:not(.button){opacity:.75}button{font-family:inherit}p{margin-top:0}blockquote{background-color:var(--bg-secondary-color);padding:1.5rem 2rem;border-left:3px solid var(--color-lightGrey)}dl dt{font-weight:700}hr{background-color:var(--color-lightGrey);height:1px;margin:1rem 0}hr,table{border:none}table{width:100%;border-collapse:collapse;border-spacing:0;text-align:left}table.striped tr:nth-of-type(2n){background-color:var(--bg-secondary-color)}td,th{vertical-align:middle;padding:1.2rem .4rem}thead{border-bottom:2px solid var(--color-lightGrey)}tfoot{border-top:2px solid var(--color-lightGrey)}code,kbd,pre,samp,tt{font-family:var(--font-family-mono)}code,kbd{font-size:90%;white-space:pre-wrap;border-radius:4px;padding:.2em .4em;color:var(--color-error)}code,kbd,pre{background-color:var(--bg-secondary-color)}pre{font-size:1em;padding:1rem;overflow-x:auto}pre code{background:none;padding:0}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}img{max-width:100%}fieldset{border:1px solid var(--color-lightGrey)}iframe{border:0}.container{max-width:var(--grid-maxWidth);margin:0 auto;width:96%;padding:0 calc(var(--grid-gutter)/2)}.row{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-direction:normal;-ms-flex-flow:row wrap;flex-flow:row wrap;-webkit-box-pack:start;-ms-flex-pack:start;justify-content:flex-start;margin-left:calc(var(--grid-gutter)/-2);margin-right:calc(var(--grid-gutter)/-2)}.row,.row.reverse{-webkit-box-orient:horizontal}.row.reverse{-webkit-box-direction:reverse;-ms-flex-direction:row-reverse;flex-direction:row-reverse}.col{-webkit-box-flex:1;-ms-flex:1;flex:1}.col,[class*=" col-"],[class^=col-]{margin:0 calc(var(--grid-gutter)/2) calc(var(--grid-gutter)/2)}.col-1{-ms-flex:0 0 calc(8.33333% - var(--grid-gutter));flex:0 0 calc(8.33333% - var(--grid-gutter));max-width:calc(8.33333% - var(--grid-gutter))}.col-1,.col-2{-webkit-box-flex:0}.col-2{-ms-flex:0 0 calc(16.66667% - var(--grid-gutter));flex:0 0 calc(16.66667% - var(--grid-gutter));max-width:calc(16.66667% - var(--grid-gutter))}.col-3{-ms-flex:0 0 calc(25% - var(--grid-gutter));flex:0 0 calc(25% - var(--grid-gutter));max-width:calc(25% - var(--grid-gutter))}.col-3,.col-4{-webkit-box-flex:0}.col-4{-ms-flex:0 0 calc(33.33333% - var(--grid-gutter));flex:0 0 calc(33.33333% - var(--grid-gutter));max-width:calc(33.33333% - var(--grid-gutter))}.col-5{-ms-flex:0 0 calc(41.66667% - var(--grid-gutter));flex:0 0 calc(41.66667% - var(--grid-gutter));max-width:calc(41.66667% - var(--grid-gutter))}.col-5,.col-6{-webkit-box-flex:0}.col-6{-ms-flex:0 0 calc(50% - var(--grid-gutter));flex:0 0 calc(50% - var(--grid-gutter));max-width:calc(50% - var(--grid-gutter))}.col-7{-ms-flex:0 0 calc(58.33333% - var(--grid-gutter));flex:0 0 calc(58.33333% - var(--grid-gutter));max-width:calc(58.33333% - var(--grid-gutter))}.col-7,.col-8{-webkit-box-flex:0}.col-8{-ms-flex:0 0 calc(66.66667% - var(--grid-gutter));flex:0 0 calc(66.66667% - var(--grid-gutter));max-width:calc(66.66667% - var(--grid-gutter))}.col-9{-ms-flex:0 0 calc(75% - var(--grid-gutter));flex:0 0 calc(75% - var(--grid-gutter));max-width:calc(75% - var(--grid-gutter))}.col-9,.col-10{-webkit-box-flex:0}.col-10{-ms-flex:0 0 calc(83.33333% - var(--grid-gutter));flex:0 0 calc(83.33333% - var(--grid-gutter));max-width:calc(83.33333% - var(--grid-gutter))}.col-11{-ms-flex:0 0 calc(91.66667% - var(--grid-gutter));flex:0 0 calc(91.66667% - var(--grid-gutter));max-width:calc(91.66667% - var(--grid-gutter))}.col-11,.col-12{-webkit-box-flex:0}.col-12{-ms-flex:0 0 calc(100% - var(--grid-gutter));flex:0 0 calc(100% - var(--grid-gutter));max-width:calc(100% - var(--grid-gutter))}@media screen and (max-width:599px){.container{width:100%}.col,[class*=col-],[class^=col-]{-webkit-box-flex:0;-ms-flex:0 1 100%;flex:0 1 100%;max-width:100%}}@media screen and (min-width:900px){.col-1-md{-webkit-box-flex:0;-ms-flex:0 0 calc(8.33333% - var(--grid-gutter));flex:0 0 calc(8.33333% - var(--grid-gutter));max-width:calc(8.33333% - var(--grid-gutter))}.col-2-md{-webkit-box-flex:0;-ms-flex:0 0 calc(16.66667% - var(--grid-gutter));flex:0 0 calc(16.66667% - var(--grid-gutter));max-width:calc(16.66667% - var(--grid-gutter))}.col-3-md{-webkit-box-flex:0;-ms-flex:0 0 calc(25% - var(--grid-gutter));flex:0 0 calc(25% - var(--grid-gutter));max-width:calc(25% - var(--grid-gutter))}.col-4-md{-webkit-box-flex:0;-ms-flex:0 0 calc(33.33333% - var(--grid-gutter));flex:0 0 calc(33.33333% - var(--grid-gutter));max-width:calc(33.33333% - var(--grid-gutter))}.col-5-md{-webkit-box-flex:0;-ms-flex:0 0 calc(41.66667% - var(--grid-gutter));flex:0 0 calc(41.66667% - var(--grid-gutter));max-width:calc(41.66667% - var(--grid-gutter))}.col-6-md{-webkit-box-flex:0;-ms-flex:0 0 calc(50% - var(--grid-gutter));flex:0 0 calc(50% - var(--grid-gutter));max-width:calc(50% - var(--grid-gutter))}.col-7-md{-webkit-box-flex:0;-ms-flex:0 0 calc(58.33333% - var(--grid-gutter));flex:0 0 calc(58.33333% - var(--grid-gutter));max-width:calc(58.33333% - var(--grid-gutter))}.col-8-md{-webkit-box-flex:0;-ms-flex:0 0 calc(66.66667% - var(--grid-gutter));flex:0 0 calc(66.66667% - var(--grid-gutter));max-width:calc(66.66667% - var(--grid-gutter))}.col-9-md{-webkit-box-flex:0;-ms-flex:0 0 calc(75% - var(--grid-gutter));flex:0 0 calc(75% - var(--grid-gutter));max-width:calc(75% - var(--grid-gutter))}.col-10-md{-webkit-box-flex:0;-ms-flex:0 0 calc(83.33333% - var(--grid-gutter));flex:0 0 calc(83.33333% - var(--grid-gutter));max-width:calc(83.33333% - var(--grid-gutter))}.col-11-md{-webkit-box-flex:0;-ms-flex:0 0 calc(91.66667% - var(--grid-gutter));flex:0 0 calc(91.66667% - var(--grid-gutter));max-width:calc(91.66667% - var(--grid-gutter))}.col-12-md{-webkit-box-flex:0;-ms-flex:0 0 calc(100% - var(--grid-gutter));flex:0 0 calc(100% - var(--grid-gutter));max-width:calc(100% - var(--grid-gutter))}}@media screen and (min-width:1200px){.col-1-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(8.33333% - var(--grid-gutter));flex:0 0 calc(8.33333% - var(--grid-gutter));max-width:calc(8.33333% - var(--grid-gutter))}.col-2-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(16.66667% - var(--grid-gutter));flex:0 0 calc(16.66667% - var(--grid-gutter));max-width:calc(16.66667% - var(--grid-gutter))}.col-3-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(25% - var(--grid-gutter));flex:0 0 calc(25% - var(--grid-gutter));max-width:calc(25% - var(--grid-gutter))}.col-4-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(33.33333% - var(--grid-gutter));flex:0 0 calc(33.33333% - var(--grid-gutter));max-width:calc(33.33333% - var(--grid-gutter))}.col-5-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(41.66667% - var(--grid-gutter));flex:0 0 calc(41.66667% - var(--grid-gutter));max-width:calc(41.66667% - var(--grid-gutter))}.col-6-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(50% - var(--grid-gutter));flex:0 0 calc(50% - var(--grid-gutter));max-width:calc(50% - var(--grid-gutter))}.col-7-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(58.33333% - var(--grid-gutter));flex:0 0 calc(58.33333% - var(--grid-gutter));max-width:calc(58.33333% - var(--grid-gutter))}.col-8-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(66.66667% - var(--grid-gutter));flex:0 0 calc(66.66667% - var(--grid-gutter));max-width:calc(66.66667% - var(--grid-gutter))}.col-9-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(75% - var(--grid-gutter));flex:0 0 calc(75% - var(--grid-gutter));max-width:calc(75% - var(--grid-gutter))}.col-10-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(83.33333% - var(--grid-gutter));flex:0 0 calc(83.33333% - var(--grid-gutter));max-width:calc(83.33333% - var(--grid-gutter))}.col-11-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(91.66667% - var(--grid-gutter));flex:0 0 calc(91.66667% - var(--grid-gutter));max-width:calc(91.66667% - var(--grid-gutter))}.col-12-lg{-webkit-box-flex:0;-ms-flex:0 0 calc(100% - var(--grid-gutter));flex:0 0 calc(100% - var(--grid-gutter));max-width:calc(100% - var(--grid-gutter))}}fieldset{padding:.5rem 2rem}legend{text-transform:uppercase;font-size:.8em;letter-spacing:.1rem}input:not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=color]):not([type=button]):not([type=reset]),select,textarea,textarea[type=text]{font-family:inherit;padding:.8rem 1rem;border-radius:4px;border:1px solid var(--color-lightGrey);font-size:1em;-webkit-transition:all .2s ease;transition:all .2s ease;display:block;width:100%}input:not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=color]):not([type=button]):not([type=reset]):not(:disabled):hover,select:hover,textarea:hover,textarea[type=text]:hover{border-color:var(--color-grey)}input:not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=color]):not([type=button]):not([type=reset]):focus,select:focus,textarea:focus,textarea[type=text]:focus{outline:none;border-color:var(--color-primary);-webkit-box-shadow:0 0 1px var(--color-primary);box-shadow:0 0 1px var(--color-primary)}input.error:not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=color]):not([type=button]):not([type=reset]),textarea.error{border-color:var(--color-error)}input.success:not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=color]):not([type=button]):not([type=reset]),textarea.success{border-color:var(--color-success)}select{-webkit-appearance:none;background:#f3f3f6 no-repeat 100%;background-size:1ex;background-origin:content-box;background-image:url("data:image/svg+xml;utf8,")}[type=checkbox],[type=radio]{width:1.6rem;height:1.6rem}.button,[type=button],[type=reset],[type=submit],button{padding:1rem 2.5rem;color:var(--color-darkGrey);background:var(--color-lightGrey);border-radius:4px;border:1px solid transparent;font-size:var(--font-size);line-height:1;text-align:center;-webkit-transition:opacity .2s ease;transition:opacity .2s ease;text-decoration:none;-webkit-transform:scale(1);transform:scale(1);display:inline-block;cursor:pointer}.grouped{display:-webkit-box;display:-ms-flexbox;display:flex}.grouped>:not(:last-child){margin-right:16px}.grouped.gapless>*{margin:0 0 0 -1px!important;border-radius:0!important}.grouped.gapless>:first-child{margin:0!important;border-radius:4px 0 0 4px!important}.grouped.gapless>:last-child{border-radius:0 4px 4px 0!important}.button+.button{margin-left:1rem}.button:hover,[type=button]:hover,[type=reset]:hover,[type=submit]:hover,button:hover{opacity:.8}.button:active,[type=button]:active,[type=reset]:active,[type=submit]:active,button:active{-webkit-transform:scale(.98);transform:scale(.98)}button:disabled,button:disabled:hover,input:disabled,input:disabled:hover{opacity:.4;cursor:not-allowed}.button.dark,.button.error,.button.primary,.button.secondary,.button.success,[type=submit]{color:#fff;z-index:1;background-color:#000;background-color:var(--color-primary)}.button.secondary{background-color:var(--color-grey)}.button.dark{background-color:var(--color-darkGrey)}.button.error{background-color:var(--color-error)}.button.success{background-color:var(--color-success)}.button.outline{background-color:transparent;border-color:var(--color-lightGrey)}.button.outline.primary{border-color:var(--color-primary);color:var(--color-primary)}.button.outline.secondary{border-color:var(--color-grey);color:var(--color-grey)}.button.outline.dark{border-color:var(--color-darkGrey);color:var(--color-darkGrey)}.button.clear{background-color:transparent;border-color:transparent;color:var(--color-primary)}.button.icon{display:-webkit-inline-box;display:-ms-inline-flexbox;display:inline-flex;-webkit-box-align:center;-ms-flex-align:center;align-items:center}.button.icon>img{margin-left:2px}.button.icon-only{padding:1rem}::-webkit-input-placeholder{color:#bdbfc4}::-moz-placeholder{color:#bdbfc4}:-ms-input-placeholder{color:#bdbfc4}::-ms-input-placeholder{color:#bdbfc4}::placeholder{color:#bdbfc4}.nav{display:-webkit-box;display:-ms-flexbox;display:flex;min-height:5rem;-webkit-box-align:stretch;-ms-flex-align:stretch;align-items:stretch}.nav img{max-height:3rem}.nav-center,.nav-left,.nav-right,.nav>.container{display:-webkit-box;display:-ms-flexbox;display:flex}.nav-center,.nav-left,.nav-right{-webkit-box-flex:1;-ms-flex:1;flex:1}.nav-left{-webkit-box-pack:start;-ms-flex-pack:start;justify-content:flex-start}.nav-right{-webkit-box-pack:end;-ms-flex-pack:end;justify-content:flex-end}.nav-center{-webkit-box-pack:center;-ms-flex-pack:center;justify-content:center}@media screen and (max-width:480px){.nav,.nav>.container{-webkit-box-orient:vertical;-webkit-box-direction:normal;-ms-flex-direction:column;flex-direction:column}.nav-center,.nav-left,.nav-right{-ms-flex-wrap:wrap;flex-wrap:wrap;-webkit-box-pack:center;-ms-flex-pack:center;justify-content:center}}.nav .brand,.nav a{text-decoration:none;display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-align:center;-ms-flex-align:center;align-items:center;padding:1rem 2rem;color:var(--color-darkGrey)}.nav .active:not(.button){color:#000;color:var(--color-primary)}.nav .brand{font-size:1.75em;padding-top:0;padding-bottom:0}.nav .brand img{padding-right:1rem}.nav .button{margin:auto 1rem}.card{padding:1rem 2rem;border-radius:4px;background:var(--bg-color);-webkit-box-shadow:0 1px 3px var(--color-grey);box-shadow:0 1px 3px var(--color-grey)}.card p:last-child{margin:0}.card header>*{margin-top:0;margin-bottom:1rem}.tabs{display:-webkit-box;display:-ms-flexbox;display:flex}.tabs a{text-decoration:none}.tabs>.dropdown>summary,.tabs>a{padding:1rem 2rem;-webkit-box-flex:0;-ms-flex:0 1 auto;flex:0 1 auto;color:var(--color-darkGrey);border-bottom:2px solid var(--color-lightGrey);text-align:center}.tabs>a.active,.tabs>a:hover{opacity:1;border-bottom:2px solid var(--color-darkGrey)}.tabs>a.active{border-color:var(--color-primary)}.tabs.is-full a{-webkit-box-flex:1;-ms-flex:1 1 auto;flex:1 1 auto}.tag{display:inline-block;border:1px solid var(--color-lightGrey);text-transform:uppercase;color:var(--color-grey);padding:.5rem;line-height:1;letter-spacing:.5px}.tag.is-small{padding:.4rem;font-size:.75em}.tag.is-large{padding:.7rem;font-size:1.125em}.tag+.tag{margin-left:1rem}details.dropdown{position:relative;display:inline-block}details.dropdown>:last-child{position:absolute;left:0;white-space:nowrap}.bg-primary{background-color:var(--color-primary)!important}.bg-light{background-color:var(--color-lightGrey)!important}.bg-dark{background-color:var(--color-darkGrey)!important}.bg-grey{background-color:var(--color-grey)!important}.bg-error{background-color:var(--color-error)!important}.bg-success{background-color:var(--color-success)!important}.bd-primary{border:1px solid var(--color-primary)!important}.bd-light{border:1px solid var(--color-lightGrey)!important}.bd-dark{border:1px solid var(--color-darkGrey)!important}.bd-grey{border:1px solid var(--color-grey)!important}.bd-error{border:1px solid var(--color-error)!important}.bd-success{border:1px solid var(--color-success)!important}.text-primary{color:var(--color-primary)!important}.text-light{color:var(--color-lightGrey)!important}.text-dark{color:var(--color-darkGrey)!important}.text-grey{color:var(--color-grey)!important}.text-error{color:var(--color-error)!important}.text-success{color:var(--color-success)!important}.text-white{color:#fff!important}.pull-right{float:right!important}.pull-left{float:left!important}.text-center{text-align:center}.text-left{text-align:left}.text-right{text-align:right}.text-justify{text-align:justify}.text-uppercase{text-transform:uppercase}.text-lowercase{text-transform:lowercase}.text-capitalize{text-transform:capitalize}.is-full-screen{width:100%;min-height:100vh}.is-full-width{width:100%!important}.is-vertical-align{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-align:center;-ms-flex-align:center;align-items:center}.is-center,.is-horizontal-align{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-pack:center;-ms-flex-pack:center;justify-content:center}.is-center{-webkit-box-align:center;-ms-flex-align:center;align-items:center}.is-right{-webkit-box-pack:end;-ms-flex-pack:end;justify-content:flex-end}.is-left,.is-right{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-align:center;-ms-flex-align:center;align-items:center}.is-left{-webkit-box-pack:start;-ms-flex-pack:start;justify-content:flex-start}.is-fixed{position:fixed;width:100%}.is-paddingless{padding:0!important}.is-marginless{margin:0!important}.is-pointer{cursor:pointer!important}.is-rounded{border-radius:100%}.clearfix{content:"";display:table;clear:both}.is-hidden{display:none!important}@media screen and (max-width:599px){.hide-xs{display:none!important}}@media screen and (min-width:600px) and (max-width:899px){.hide-sm{display:none!important}}@media screen and (min-width:900px) and (max-width:1199px){.hide-md{display:none!important}}@media screen and (min-width:1200px){.hide-lg{display:none!important}}@media print{.hide-pr{display:none!important}} \ No newline at end of file diff --git a/examples/browser-article-parser/public/index.html b/examples/browser-article-parser/public/index.html deleted file mode 100644 index 1a5fbf5..0000000 --- a/examples/browser-article-parser/public/index.html +++ /dev/null @@ -1,88 +0,0 @@ - - - - Example article-parser - - - - -
-
-
-

article-parser on browser

-
-
-
- enter link to blog post or news article -

- -

-

- - -

-
-
-
- Result -

- -

-
-
-
- - - diff --git a/examples/browser-article-parser/server.js b/examples/browser-article-parser/server.js deleted file mode 100644 index 271c204..0000000 --- a/examples/browser-article-parser/server.js +++ /dev/null @@ -1,30 +0,0 @@ -// server - -import got from 'got' -import express from 'express' - -const app = express() - -const loadRemotePage = async (url) => { - try { - const headers = { - 'Accept-Charset': 'utf-8', - } - const data = await got(url, { headers }).text() - return data - } catch (err) { - return err.message - } -} - -app.get('/proxy/gethtml', async (req, res) => { - const url = req.query.url - const xml = await loadRemotePage(url) - return res.send(xml) -}) - -app.use(express.static('public')) - -app.listen(3100, () => { - console.log('Server is running at http://localhost:3100') -}) diff --git a/examples/bun-article-parser/.gitignore b/examples/bun-article-parser/.gitignore deleted file mode 100644 index f0e3e7e..0000000 --- a/examples/bun-article-parser/.gitignore +++ /dev/null @@ -1,171 +0,0 @@ -# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore - -# Logs - -logs -_.log -npm-debug.log_ -yarn-debug.log* -yarn-error.log* -lerna-debug.log* -.pnpm-debug.log* - -# Diagnostic reports (https://nodejs.org/api/report.html) - -report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json - -# Runtime data - -pids -_.pid -_.seed -\*.pid.lock - -# Directory for instrumented libs generated by jscoverage/JSCover - -lib-cov - -# Coverage directory used by tools like istanbul - -coverage -\*.lcov - -# nyc test coverage - -.nyc_output - -# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) - -.grunt - -# Bower dependency directory (https://bower.io/) - -bower_components - -# node-waf configuration - -.lock-wscript - -# Compiled binary addons (https://nodejs.org/api/addons.html) - -build/Release - -# Dependency directories - -node_modules/ -jspm_packages/ - -# Snowpack dependency directory (https://snowpack.dev/) - -web_modules/ - -# TypeScript cache - -\*.tsbuildinfo - -# Optional npm cache directory - -.npm - -# Optional eslint cache - -.eslintcache - -# Optional stylelint cache - -.stylelintcache - -# Microbundle cache - -.rpt2_cache/ -.rts2_cache_cjs/ -.rts2_cache_es/ -.rts2_cache_umd/ - -# Optional REPL history - -.node_repl_history - -# Output of 'npm pack' - -\*.tgz - -# Yarn Integrity file - -.yarn-integrity - -# dotenv environment variable files - -.env -.env.development.local -.env.test.local -.env.production.local -.env.local - -# parcel-bundler cache (https://parceljs.org/) - -.cache -.parcel-cache - -# Next.js build output - -.next -out - -# Nuxt.js build / generate output - -.nuxt -dist - -# Gatsby files - -.cache/ - -# Comment in the public line in if your project uses Gatsby and not Next.js - -# https://nextjs.org/blog/next-9-1#public-directory-support - -# public - -# vuepress build output - -.vuepress/dist - -# vuepress v2.x temp and cache directory - -.temp -.cache - -# Docusaurus cache and generated files - -.docusaurus - -# Serverless directories - -.serverless/ - -# FuseBox cache - -.fusebox/ - -# DynamoDB Local files - -.dynamodb/ - -# TernJS port file - -.tern-port - -# Stores VSCode versions used for testing VSCode extensions - -.vscode-test - -# yarn v2 - -.yarn/cache -.yarn/unplugged -.yarn/build-state.yml -.yarn/install-state.gz -.pnp.\* - -bun.lockb diff --git a/examples/bun-article-parser/README.md b/examples/bun-article-parser/README.md deleted file mode 100644 index 21d6eba..0000000 --- a/examples/bun-article-parser/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# bun-article-parser - -To install dependencies: - -```bash -bun install -``` - -Start server: - -```bash -bun start -``` - -Open `http://localhost:3100/?url=https://dev.to/ndaidong/how-to-make-your-mongodb-container-more-secure-1646` to see the result. - ---- diff --git a/examples/bun-article-parser/index.ts b/examples/bun-article-parser/index.ts deleted file mode 100644 index dcc42fe..0000000 --- a/examples/bun-article-parser/index.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { Hono } from 'hono' - -import { extract } from '@extractus/article-extractor' - -const app = new Hono() - -const meta = { - service: 'article-parser', - lang: 'typescript', - server: 'hono', - platform: 'bun' -} - -app.get('/', async (c) => { - const url = c.req.query('url') - if (!url) { - return c.json(meta) - } - try { - const data = await extract(url) - return c.json({ - error: 0, - message: 'article has been extracted successfully', - data, - meta - }) - } catch (err) { - return c.json({ - error: 1, - message: err.message, - data: null, - meta - }) - } -}) - -export default { - port: 3100, - fetch: app.fetch, -} - -console.log('Server is running at http://localhost:3100') diff --git a/examples/bun-article-parser/package.json b/examples/bun-article-parser/package.json deleted file mode 100644 index 74b6ec3..0000000 --- a/examples/bun-article-parser/package.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "name": "bun-article-parser", - "module": "index.ts", - "scripts": { - "start": "bun run index.ts" - }, - "devDependencies": { - "bun-types": "latest" - }, - "dependencies": { - "@extractus/article-extractor": "latest", - "hono": "latest" - } -} diff --git a/examples/bun-article-parser/tsconfig.json b/examples/bun-article-parser/tsconfig.json deleted file mode 100644 index ebdb7d2..0000000 --- a/examples/bun-article-parser/tsconfig.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "compilerOptions": { - "lib": ["ESNext"], - "module": "esnext", - "target": "esnext", - // "bun-types" is the important part - "types": ["bun-types"] - } -} diff --git a/examples/deno-article-parser/README.md b/examples/deno-article-parser/README.md deleted file mode 100644 index 44528f6..0000000 --- a/examples/deno-article-parser/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# deno-article-parser - -With `deno`, we have not much thing to do. Just start the server: - -```bash -deno task dev -``` - -Open `http://localhost:3100/?url=https://dev.to/ndaidong/how-to-make-your-mongodb-container-more-secure-1646` to see the result. - ---- diff --git a/examples/deno-article-parser/deno.json b/examples/deno-article-parser/deno.json deleted file mode 100644 index 2243c40..0000000 --- a/examples/deno-article-parser/deno.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "deno-article-parser", - "version": "1.0.0", - "imports": { - "serve": "https://deno.land/std/http/server.ts", - "hono": "https://deno.land/x/hono/mod.ts", - "article-extractor": "https://esm.sh/@extractus/article-extractor" - }, - "tasks": { - "dev": "deno run --allow-net --allow-env --allow-read index.ts", - }, - "compilerOptions": { - "allowJs": true, - "noImplicitAny": false, - "lib": ["deno.window"], - "strict": true - } -} diff --git a/examples/deno-article-parser/index.ts b/examples/deno-article-parser/index.ts deleted file mode 100644 index 917a624..0000000 --- a/examples/deno-article-parser/index.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { serve } from 'serve' -import { Hono } from 'hono' - -import { extract } from 'article-extractor' - -const app = new Hono() - -const meta = { - service: 'article-parser', - lang: 'typescript', - server: 'hono', - platform: 'deno' -} - -app.get('/', async (c) => { - const url = c.req.query('url') - if (!url) { - return c.json(meta) - } - try { - const data = await extract(url) - return c.json({ - error: 0, - message: 'article has been extracted successfully', - data, - meta - }) - } catch (err) { - return c.json({ - error: 1, - message: err.message, - data: null, - meta - }) - } -}) - -serve(app.fetch, { - port: 3100, - onListen: () => { - console.log('Server is running at http://localhost:3100') - } -}) diff --git a/examples/node-article-parser/.gitignore b/examples/node-article-parser/.gitignore deleted file mode 100644 index 48012a9..0000000 --- a/examples/node-article-parser/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -# Logs -logs -*.log -*.debug - -# Runtime data -*.pid -*.seed - -node_modules -coverage -.nyc_output - -.DS_Store -yarn.lock -coverage.lcov -pnpm-lock.yaml diff --git a/examples/node-article-parser/README.md b/examples/node-article-parser/README.md deleted file mode 100644 index 7ffc0da..0000000 --- a/examples/node-article-parser/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# node-article-parser - -Install dependencies: - -```bash -npm i - -# or pnpm, yarn -``` - -Start server: - -```bash -npm start -``` - -Open `http://localhost:3100/?url=https://dev.to/ndaidong/how-to-make-your-mongodb-container-more-secure-1646` to see the result. - ---- diff --git a/examples/node-article-parser/index.js b/examples/node-article-parser/index.js deleted file mode 100644 index 820c9de..0000000 --- a/examples/node-article-parser/index.js +++ /dev/null @@ -1,38 +0,0 @@ -import express from 'express' -import { extract } from '@extractus/article-extractor' - -const app = express() - -const meta = { - service: 'article-parser', - lang: 'javascript', - server: 'express', - platform: 'node', -} - -app.get('/', async (req, res) => { - const url = req.query.url - if (!url) { - return res.json(meta) - } - try { - const data = await extract(url) - return res.json({ - error: 0, - message: 'article has been extracted successfully', - data, - meta, - }) - } catch (err) { - return res.json({ - error: 1, - message: err.message, - data: null, - meta, - }) - } -}) - -app.listen(3100, () => { - console.log('Server is running at http://localhost:3100') -}) diff --git a/examples/node-article-parser/package.json b/examples/node-article-parser/package.json deleted file mode 100644 index ff22efb..0000000 --- a/examples/node-article-parser/package.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "name": "node-article-parser", - "version": "1.0.0", - "main": "index.js", - "type": "module", - "scripts": { - "start": "node index.js" - }, - "dependencies": { - "@extractus/article-extractor": "latest", - "express": "latest" - } -} diff --git a/examples/pupperteer/README.md b/examples/pupperteer/README.md deleted file mode 100644 index 86aad2e..0000000 --- a/examples/pupperteer/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# node-article-parser with Pupperteer - -Install dependencies: - -```bash -npm i - -# or pnpm, yarn -``` - -Start server: - -```bash -npm start -``` - -Open `http://localhost:3100/?url=https://client-side-rendering.pages.dev/lorem-ipsum` to see the result. - ---- diff --git a/examples/pupperteer/index.js b/examples/pupperteer/index.js deleted file mode 100644 index 5fa9ffd..0000000 --- a/examples/pupperteer/index.js +++ /dev/null @@ -1,64 +0,0 @@ -import puppeteer from 'puppeteer' -import express from 'express' -import { extractFromHtml } from '@extractus/article-extractor' - -const app = express() - -const meta = { - service: 'article-parser-pupperteer', - lang: 'javascript', - server: 'express', - platform: 'node', -} - -const loadHtml = async (url) => { - let browser = null - try { - console.log('Initialize puppeteer engine') - browser = await puppeteer.launch() - const page = await browser.newPage() - await page.setDefaultNavigationTimeout(6e4) - console.log(`Start rendering target page "${url}"`) - await page.goto(url, { - waitUntil: 'networkidle0', - }) - console.log(`Load html content from target page ${url}`) - const html = await page.content() - return html - } catch (err) { - console.error(err) - return null - } finally { - if (browser) { - await browser.close() - } - } -} - -app.get('/', async (req, res) => { - const url = req.query.url - if (!url) { - return res.json(meta) - } - try { - const html = await loadHtml(url) - const data = await extractFromHtml(html, url) - return res.json({ - error: 0, - message: 'article has been extracted successfully', - data, - meta, - }) - } catch (err) { - return res.json({ - error: 1, - message: err.message, - data: null, - meta, - }) - } -}) - -app.listen(3100, () => { - console.log('Server is running at http://localhost:3100') -}) diff --git a/examples/pupperteer/package.json b/examples/pupperteer/package.json deleted file mode 100644 index 23df845..0000000 --- a/examples/pupperteer/package.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "name": "node-pupperteer", - "version": "1.0.0", - "main": "index.js", - "type": "module", - "scripts": { - "start": "node index.js" - }, - "dependencies": { - "@extractus/article-extractor": "latest", - "express": "latest", - "puppeteer": "latest" - } -} diff --git a/examples/tsnode-article-parser/.gitignore b/examples/tsnode-article-parser/.gitignore deleted file mode 100644 index 5bad394..0000000 --- a/examples/tsnode-article-parser/.gitignore +++ /dev/null @@ -1,19 +0,0 @@ -# Logs -logs -*.log -*.debug - -# Runtime data -*.pid -*.seed - -node_modules -coverage -.nyc_output - -.DS_Store -yarn.lock -coverage.lcov -pnpm-lock.yaml - -dist diff --git a/examples/tsnode-article-parser/README.md b/examples/tsnode-article-parser/README.md deleted file mode 100644 index fa95de1..0000000 --- a/examples/tsnode-article-parser/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# tsnode-article-parser - -Install dependencies: - -```bash -npm i - -# or pnpm, yarn -``` - -Build typescript source to javascript and start server: - -```bash -npx tsc && node dist/index.js - -# or simple -npm start -``` - -Open `http://localhost:3100/?url=https://dev.to/ndaidong/how-to-make-your-mongodb-container-more-secure-1646` to see the result. - ---- diff --git a/examples/tsnode-article-parser/index.ts b/examples/tsnode-article-parser/index.ts deleted file mode 100644 index 2c5b92c..0000000 --- a/examples/tsnode-article-parser/index.ts +++ /dev/null @@ -1,38 +0,0 @@ -import express from 'express' -import { extract } from '@extractus/article-extractor' - -const app = express() - -const meta = { - service: 'article-parser', - lang: 'typescript', - server: 'express', - platform: 'node' -} - -app.get('/', async (req, res) => { - const url = req.query.url - if (!url) { - return res.json(meta) - } - try { - const data = await extract(url) - return res.json({ - error: 0, - message: 'article has been extracted successfully', - data, - meta - }) - } catch (err) { - return res.json({ - error: 1, - message: err.message, - data: null, - meta - }) - } -}) - -app.listen(3100, () => { - console.log('Server is running at http://localhost:3100') -}) diff --git a/examples/tsnode-article-parser/package.json b/examples/tsnode-article-parser/package.json deleted file mode 100644 index ca586db..0000000 --- a/examples/tsnode-article-parser/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "tsnode-article-parser", - "version": "1.0.0", - "main": "index.ts", - "type": "module", - "scripts": { - "prestart": "npx tsc", - "start": "node dist/index.js" - }, - "devDependencies": { - "typescript": "latest" - }, - "dependencies": { - "@extractus/article-extractor": "latest", - "express": "latest" - } -} diff --git a/examples/tsnode-article-parser/tsconfig.json b/examples/tsnode-article-parser/tsconfig.json deleted file mode 100644 index 5e247d6..0000000 --- a/examples/tsnode-article-parser/tsconfig.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "compilerOptions": { - "module": "es6", - "esModuleInterop": true, - "target": "esnext", - "moduleResolution": "node", - "sourceMap": true, - "outDir": "dist" - }, - "lib": ["es2015"] -} diff --git a/package.json b/package.json index 8cae001..a11a000 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "version": "8.0.19", + "version": "8.0.20", "name": "@extractus/article-extractor", "description": "To extract main article from given URL", "homepage": "https://github.com/extractus/article-extractor", From 69baf8d18c7f1b9a0852a1e698cd5b9d27b7cacd Mon Sep 17 00:00:00 2001 From: Dong Nguyen Date: Thu, 4 Sep 2025 11:36:15 +0700 Subject: [PATCH 3/3] v8.0.20 - Update packages --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index a11a000..8dc8f20 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ }, "types": "./index.d.ts", "engines": { - "node": ">= 18" + "node": ">= 20" }, "scripts": { "lint": "eslint .",