diff --git a/.gitignore b/.gitignore index 6ed95489e41..b28c5e95d40 100644 --- a/.gitignore +++ b/.gitignore @@ -232,6 +232,7 @@ config/* !config/nginx.conf !config/php-fpm.conf !config/php.ini +docker-compose.* ###################### ## VisualStudioCode ## diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f1080743c41..922d9453f71 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -144,6 +144,7 @@ * [Niehztog](https://github.com/Niehztog) * [NikNikYkt](https://github.com/NikNikYkt) * [Nono-m0le](https://github.com/Nono-m0le) +* [NotsoanoNimus](https://github.com/NotsoanoNimus) * [obsiwitch](https://github.com/obsiwitch) * [Ololbu](https://github.com/Ololbu) * [ORelio](https://github.com/ORelio) diff --git a/README.md b/README.md index 6124a4eac96..b3b12f0ea28 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,10 @@ Requires minimum PHP 7.4. ### How to install on traditional shared web hosting -RSS-Bridge can basically be unzipped in a web folder. Should be working instantly. +RSS-Bridge can basically be unzipped into a web folder. Should be working instantly. -Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip +Latest zip: +https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB) ### How to install on Debian 12 (nginx + php-fpm) @@ -66,7 +67,7 @@ timedatectl set-timezone Europe/Oslo apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl -# Create a new user account +# Create a user account useradd --shell /bin/bash --create-home rss-bridge cd /var/www @@ -101,7 +102,10 @@ Nginx config: server { listen 80; + + # TODO: change to your own server name server_name example.com; + access_log /var/log/nginx/rss-bridge.access.log; error_log /var/log/nginx/rss-bridge.error.log; log_not_found off; @@ -150,8 +154,11 @@ listen = /run/php/rss-bridge.sock listen.owner = www-data listen.group = www-data +# Create 10 workers standing by to serve requests pm = static pm.max_children = 10 + +# Respawn worker after 500 requests (workaround for memory leaks etc.) pm.max_requests = 500 ``` @@ -179,7 +186,7 @@ Install the latest release. ```shell cd /var/www -composer create-project -v --no-dev rss-bridge/rss-bridge +composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge ``` ### How to install with Caddy @@ -192,8 +199,16 @@ Install by downloading the docker image from Docker Hub: ```bash # Create container -docker create --name=rss-bridge --publish 3000:80 rssbridge/rss-bridge +docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge +``` + +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** +See `docker-entrypoint.sh` for details. + +```bash # Start container docker start rss-bridge ``` @@ -207,30 +222,29 @@ Browse http://localhost:3000/ docker build -t rss-bridge . # Create container -docker create --name rss-bridge --publish 3000:80 rss-bridge +docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge +``` + +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** +See `docker-entrypoint.sh` for details. + +```bash # Start container docker start rss-bridge ``` Browse http://localhost:3000/ -### Install with docker-compose - -Create a `docker-compose.yml` file locally with with the following content: -```yml -version: '2' -services: - rss-bridge: - image: rssbridge/rss-bridge:latest - volumes: - - :/config - ports: - - 3000:80 - restart: unless-stopped -``` +### Install with docker-compose (using Docker Hub) + +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** -Then launch with `docker-compose`: +See `docker-entrypoint.sh` for details. ```bash docker-compose up @@ -418,7 +432,16 @@ See `formats/PlaintextFormat.php` for an example. These commands require that you have installed the dev dependencies in `composer.json`. +Run all tests: + ./vendor/bin/phpunit + +Run a single test class: + + ./vendor/bin/phpunit --filter UrlTest + +Run linter: + ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ https://github.com/squizlabs/PHP_CodeSniffer/wiki diff --git a/actions/ConnectivityAction.php b/actions/ConnectivityAction.php index 09d9c6c68ed..27c388f4b94 100644 --- a/actions/ConnectivityAction.php +++ b/actions/ConnectivityAction.php @@ -19,19 +19,19 @@ public function __construct() $this->bridgeFactory = new BridgeFactory(); } - public function execute(Request $request) + public function __invoke(Request $request): Response { if (!Debug::isEnabled()) { - return new Response('This action is only available in debug mode!', 403); + return new Response(xlat('errors:actions:display:debug_required'), 403); } $bridgeName = $request->get('bridge'); if (!$bridgeName) { - return render_template('connectivity.html.php'); + return new Response(render_template('connectivity.html.php')); } $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); if (!$bridgeClassName) { - return new Response('Bridge not found', 404); + return new Response(xlat('errors:general:not_found'), 404); } return $this->reportBridgeConnectivity($bridgeClassName); } @@ -39,7 +39,7 @@ public function execute(Request $request) private function reportBridgeConnectivity($bridgeClassName) { if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { - throw new \Exception('Bridge is not whitelisted!'); + throw new \Exception(xlat('errors:general:whitelist')); } $bridge = $this->bridgeFactory->create($bridgeClassName); @@ -54,8 +54,8 @@ private function reportBridgeConnectivity($bridgeClassName) ]; try { $response = getContents($bridge::URI, [], $curl_opts, true); - $result['http_code'] = $response['code']; - if (in_array($response['code'], [200])) { + $result['http_code'] = $response->getCode(); + if (in_array($result['http_code'], [200])) { $result['successful'] = true; } } catch (\Exception $e) { diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 0c61f1b60d1..d9677e733bf 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -2,16 +2,16 @@ class DetectAction implements ActionInterface { - public function execute(Request $request) + public function __invoke(Request $request): Response { $url = $request->get('url'); $format = $request->get('format'); if (!$url) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url'])); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:specify_url')])); } if (!$format) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format'])); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:specify_format')])); } $bridgeFactory = new BridgeFactory(); @@ -39,7 +39,7 @@ public function execute(Request $request) } return new Response(render(__DIR__ . '/../templates/error.html.php', [ - 'message' => 'No bridge found for given URL: ' . $url, + 'message' => xlat('errors:general:not_found_for_url') . ': ' . $url, ])); } } diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 93813004f22..cb8b5350353 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -11,7 +11,7 @@ public function __construct() $this->logger = RssBridge::getLogger(); } - public function execute(Request $request) + public function __invoke(Request $request): Response { $bridgeName = $request->get('bridge'); $format = $request->get('format'); @@ -32,23 +32,23 @@ public function execute(Request $request) return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']); } } - return $cachedResponse->withHeader('rss-bridge', 'This is a cached response'); + return $cachedResponse->withHeader('rss-bridge', xlat('errors:actions:display:cached')); } if (!$bridgeName) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge parameter']), 400); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:missing_parameter')]), 400); } $bridgeFactory = new BridgeFactory(); $bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName); if (!$bridgeClassName) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:not_found')]), 404); } if (!$format) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:format')]), 400); } if (!$bridgeFactory->isEnabled($bridgeClassName)) { - return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => xlat('errors:general:whitelist')]), 400); } if ( @@ -145,7 +145,7 @@ private function createResponse(Request $request, BridgeAbstract $bridge, string if ($errorCount >= $reportLimit) { if ($errorOutput === 'feed') { // Render the exception as a feed item - $items[] = $this->createFeedItemFromException($e, $bridge); + $items = [$this->createFeedItemFromException($e, $bridge)]; } elseif ($errorOutput === 'http') { return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500); } elseif ($errorOutput === 'none') { @@ -174,7 +174,7 @@ private function createFeedItemFromException($e, BridgeAbstract $bridge): FeedIt // Create a unique identifier every 24 hours $uniqueIdentifier = urlencode((int)(time() / 86400)); - $title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier); + $title = sprintf('%s %s! (%s)', xlat('errors:actions:display:error'), $e->getCode(), $uniqueIdentifier); $item->setTitle($title); $item->setURI(get_current_url()); $item->setTimestamp(time()); @@ -213,22 +213,33 @@ private function logBridgeError($bridgeName, $code) return $report['count']; } - private static function createGithubIssueUrl($bridge, $e, string $message): string + private static function createGithubIssueUrl(BridgeAbstract $bridge, \Exception $e, string $message): string { - return sprintf('https://github.com/RSS-Bridge/rss-bridge/issues/new?%s', http_build_query([ - 'title' => sprintf('%s failed with error %s', $bridge->getName(), $e->getCode()), + $maintainer = $bridge->getMaintainer(); + if (str_contains($maintainer, ',')) { + $maintainers = explode(',', $maintainer); + } else { + $maintainers = [$maintainer]; + } + $maintainers = array_map('trim', $maintainers); + + $query = [ + 'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(), 'body' => sprintf( - "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```", + "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s", $message, implode("\n", trace_to_call_points(trace_from_exception($e))), $_SERVER['QUERY_STRING'] ?? '', Configuration::getVersion(), PHP_OS_FAMILY, - phpversion() ?: 'Unknown' + phpversion() ?: 'Unknown', + implode(', @', $maintainers), ), 'labels' => 'Bridge-Broken', - 'assignee' => $bridge->getMaintainer(), - ])); + 'assignee' => $maintainer[0], + ]; + + return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query); } private static function createGithubSearchUrl($bridge): string diff --git a/actions/FindfeedAction.php b/actions/FindfeedAction.php index 94dc6b72b58..228830ab205 100644 --- a/actions/FindfeedAction.php +++ b/actions/FindfeedAction.php @@ -7,16 +7,16 @@ */ class FindfeedAction implements ActionInterface { - public function execute(Request $request) + public function __invoke(Request $request): Response { $url = $request->get('url'); $format = $request->get('format'); if (!$url) { - return new Response('You must specify a url', 400); + return new Response(xlat('errors:general:specify_url'), 400); } if (!$format) { - return new Response('You must specify a format', 400); + return new Response(xlat('errors:general:specify_format'), 400); } $bridgeFactory = new BridgeFactory(); @@ -69,7 +69,7 @@ public function execute(Request $request) $results[] = $content; } if ($results === []) { - return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']); + return new Response(Json::encode(['message' => xlat('errors:general:not_found_for_url')]), 404, ['content-type' => 'application/json']); } return new Response(Json::encode($results), 200, ['content-type' => 'application/json']); } @@ -82,7 +82,7 @@ private function getParameterName($bridge, $context, $key) } else if (isset($bridge::PARAMETERS['global'][$key]['name'])) { $name = $bridge::PARAMETERS['global'][$key]['name']; } else { - $name = 'Variable "' . $key . '" (No name provided)'; + $name = xlat('errors:actions:findfeed:no_name_var', $key); } return $name; } diff --git a/actions/FrontpageAction.php b/actions/FrontpageAction.php index 32795c45de0..a7ed9a09dbe 100644 --- a/actions/FrontpageAction.php +++ b/actions/FrontpageAction.php @@ -2,7 +2,7 @@ final class FrontpageAction implements ActionInterface { - public function execute(Request $request) + public function __invoke(Request $request): Response { $messages = []; $activeBridges = 0; @@ -12,8 +12,8 @@ public function execute(Request $request) foreach ($bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) { $messages[] = [ - 'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge), - 'level' => 'warning' + 'body' => xlat('errors:general:not_found_named', $missingEnabledBridge), + 'level' => 'warning', ]; } @@ -26,13 +26,13 @@ public function execute(Request $request) } // todo: cache this renderered template? - return render(__DIR__ . '/../templates/frontpage.html.php', [ + return new Response(render(__DIR__ . '/../templates/frontpage.html.php', [ 'messages' => $messages, 'admin_email' => Configuration::getConfig('admin', 'email'), 'admin_telegram' => Configuration::getConfig('admin', 'telegram'), 'bridges' => $body, 'active_bridges' => $activeBridges, 'total_bridges' => count($bridgeClassNames), - ]); + ])); } } diff --git a/actions/HealthAction.php b/actions/HealthAction.php index a38879c2885..e9c23bfe647 100644 --- a/actions/HealthAction.php +++ b/actions/HealthAction.php @@ -4,11 +4,11 @@ class HealthAction implements ActionInterface { - public function execute(Request $request) + public function __invoke(Request $request): Response { $response = [ 'code' => 200, - 'message' => 'all is good', + 'message' => xlat('misc:all_is_good'), ]; return new Response(Json::encode($response), 200, ['content-type' => 'application/json']); } diff --git a/actions/ListAction.php b/actions/ListAction.php index 3d9cdd738f4..69e03ed2652 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -2,7 +2,7 @@ class ListAction implements ActionInterface { - public function execute(Request $request) + public function __invoke(Request $request): Response { $list = new \stdClass(); $list->bridges = []; @@ -14,7 +14,9 @@ public function execute(Request $request) $bridge = $bridgeFactory->create($bridgeClassName); $list->bridges[$bridgeClassName] = [ - 'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', + 'status' => $bridgeFactory->isEnabled($bridgeClassName) + ? xlat('misc:active') + : xlat('misc:inactive'), 'uri' => $bridge->getURI(), 'donationUri' => $bridge->getDonationURI(), 'name' => $bridge->getName(), diff --git a/bin/cache-clear b/bin/cache-clear index 3563abadc1a..635f41d59c5 100755 --- a/bin/cache-clear +++ b/bin/cache-clear @@ -7,8 +7,21 @@ require __DIR__ . '/../lib/bootstrap.php'; -$rssBridge = new RssBridge(); +$config = []; +if (file_exists(__DIR__ . '/../config.ini.php')) { + $config = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); + if (!$config) { + http_response_code(500); + exit("Error parsing config.ini.php\n"); + } +} +Configuration::loadConfiguration($config, getenv()); -$cache = RssBridge::getCache(); +$logger = new SimpleLogger('rssbridge'); + +$logger->addHandler(new StreamHandler('php://stderr', Logger::INFO)); + +$cacheFactory = new CacheFactory($logger); +$cache = $cacheFactory->create(); $cache->clear(); diff --git a/bin/cache-prune b/bin/cache-prune index 7b7a603130d..281c019df2b 100755 --- a/bin/cache-prune +++ b/bin/cache-prune @@ -7,8 +7,21 @@ require __DIR__ . '/../lib/bootstrap.php'; -$rssBridge = new RssBridge(); +$config = []; +if (file_exists(__DIR__ . '/../config.ini.php')) { + $config = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); + if (!$config) { + http_response_code(500); + exit("Error parsing config.ini.php\n"); + } +} +Configuration::loadConfiguration($config, getenv()); -$cache = RssBridge::getCache(); +$logger = new SimpleLogger('rssbridge'); + +$logger->addHandler(new StreamHandler('php://stderr', Logger::INFO)); + +$cacheFactory = new CacheFactory($logger); +$cache = $cacheFactory->create(); $cache->prune(); diff --git a/bridges/ARDMediathekBridge.php b/bridges/ARDMediathekBridge.php index 6de8dad7c75..da11dd642ae 100644 --- a/bridges/ARDMediathekBridge.php +++ b/bridges/ARDMediathekBridge.php @@ -40,6 +40,11 @@ class ARDMediathekBridge extends BridgeAbstract * @const IMAGEWIDTHPLACEHOLDER */ const IMAGEWIDTHPLACEHOLDER = '{width}'; + /** + * Title of the current show + * @var string + */ + private $title; const PARAMETERS = [ [ @@ -72,7 +77,7 @@ public function collectData() } } - $url = self::APIENDPOINT . $showID . '/?pageSize=' . self::PAGESIZE; + $url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE; $rawJSON = getContents($url); $processedJSON = json_decode($rawJSON); @@ -93,6 +98,17 @@ public function collectData() $this->items[] = $item; } + $this->title = $processedJSON->title; + date_default_timezone_set($oldTz); } + + /** {@inheritdoc} */ + public function getName() + { + if (!empty($this->title)) { + return $this->title; + } + return parent::getName(); + } } diff --git a/bridges/AnfrBridge.php b/bridges/AnfrBridge.php new file mode 100644 index 00000000000..391fde77298 --- /dev/null +++ b/bridges/AnfrBridge.php @@ -0,0 +1,278 @@ + [ + 'departement' => [ + 'name' => 'Département', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Ain' => '001', + 'Aisne' => '002', + 'Allier' => '003', + 'Alpes-de-Haute-Provence' => '004', + 'Hautes-Alpes' => '005', + 'Alpes-Maritimes' => '006', + 'Ardèche' => '007', + 'Ardennes' => '008', + 'Ariège' => '009', + 'Aube' => '010', + 'Aude' => '011', + 'Aveyron' => '012', + 'Bouches-du-Rhône' => '013', + 'Calvados' => '014', + 'Cantal' => '015', + 'Charente' => '016', + 'Charente-Maritime' => '017', + 'Cher' => '018', + 'Corrèze' => '019', + 'Corse-du-Sud' => '02A', + 'Haute-Corse' => '02B', + 'Côte-d\'Or' => '021', + 'Côtes-d\'Armor' => '022', + 'Creuse' => '023', + 'Dordogne' => '024', + 'Doubs' => '025', + 'Drôme' => '026', + 'Eure' => '027', + 'Eure-et-Loir' => '028', + 'Finistère' => '029', + 'Gard' => '030', + 'Haute-Garonne' => '031', + 'Gers' => '032', + 'Gironde' => '033', + 'Hérault' => '034', + 'Ille-et-Vilaine' => '035', + 'Indre' => '036', + 'Indre-et-Loire' => '037', + 'Isère' => '038', + 'Jura' => '039', + 'Landes' => '040', + 'Loir-et-Cher' => '041', + 'Loire' => '042', + 'Haute-Loire' => '043', + 'Loire-Atlantique' => '044', + 'Loiret' => '045', + 'Lot' => '046', + 'Lot-et-Garonne' => '047', + 'Lozère' => '048', + 'Maine-et-Loire' => '049', + 'Manche' => '050', + 'Marne' => '051', + 'Haute-Marne' => '052', + 'Mayenne' => '053', + 'Meurthe-et-Moselle' => '054', + 'Meuse' => '055', + 'Morbihan' => '056', + 'Moselle' => '057', + 'Nièvre' => '058', + 'Nord' => '059', + 'Oise' => '060', + 'Orne' => '061', + 'Pas-de-Calais' => '062', + 'Puy-de-Dôme' => '063', + 'Pyrénées-Atlantiques' => '064', + 'Hautes-Pyrénées' => '065', + 'Pyrénées-Orientales' => '066', + 'Bas-Rhin' => '067', + 'Haut-Rhin' => '068', + 'Rhône' => '069', + 'Haute-Saône' => '070', + 'Saône-et-Loire' => '071', + 'Sarthe' => '072', + 'Savoie' => '073', + 'Haute-Savoie' => '074', + 'Paris' => '075', + 'Seine-Maritime' => '076', + 'Seine-et-Marne' => '077', + 'Yvelines' => '078', + 'Deux-Sèvres' => '079', + 'Somme' => '080', + 'Tarn' => '081', + 'Tarn-et-Garonne' => '082', + 'Var' => '083', + 'Vaucluse' => '084', + 'Vendée' => '085', + 'Vienne' => '086', + 'Haute-Vienne' => '087', + 'Vosges' => '088', + 'Yonne' => '089', + 'Territoire de Belfort' => '090', + 'Essonne' => '091', + 'Hauts-de-Seine' => '092', + 'Seine-Saint-Denis' => '093', + 'Val-de-Marne' => '094', + 'Val-d\'Oise' => '095', + 'Guadeloupe' => '971', + 'Martinique' => '972', + 'Guyane' => '973', + 'La Réunion' => '974', + 'Saint-Pierre-et-Miquelon' => '975', + 'Mayotte' => '976', + 'Saint-Barthélemy' => '977', + 'Saint-Martin' => '978', + 'Terres australes et antarctiques françaises' => '984', + 'Wallis-et-Futuna' => '986', + 'Polynésie française' => '987', + 'Nouvelle-Calédonie' => '988', + 'Île de Clipperton' => '989' + ] + ], + 'generation' => [ + 'name' => 'Génération', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + '2G' => '2G', + '3G' => '3G', + '4G' => '4G', + '5G' => '5G', + ] + ], + 'operateur' => [ + 'name' => 'Opérateur', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Bouygues Télécom' => 'BOUYGUES TELECOM', + 'Dauphin Télécom' => 'DAUPHIN TELECOM', + 'Digiciel' => 'DIGICEL', + 'Free Caraïbes' => 'FREE CARAIBES', + 'Free Mobile' => 'FREE MOBILE', + 'GLOBALTEL' => 'GLOBALTEL', + 'Office des postes et télécommunications de Nouvelle Calédonie' => 'Gouv Nelle Calédonie (OPT)', + 'Maore Mobile' => 'MAORE MOBILE', + 'ONATi' => 'ONATI', + 'Orange' => 'ORANGE', + 'Outremer Telecom' => 'OUTREMER TELECOM', + 'Vodafone polynésie' => 'PMT/VODAPHONE', + 'SFR' => 'SFR', + 'SPM Télécom' => 'SPM TELECOM', + 'Service des Postes et Télécommunications de Polynésie Française' => 'Gouv Nelle Calédonie (OPT)', + 'SRR' => 'SRR', + 'Station étrangère' => 'Station étrangère', + 'Telco OI' => 'TELCO IO', + 'United Telecommunication Services Caraïbes' => 'UTS Caraibes', + 'Ora Mobile' => 'VITI SAS', + 'Zeop' => 'ZEOP' + ] + ], + 'statut' => [ + 'name' => 'Statut', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'En service' => 'En service', + 'Projet approuvé' => 'Projet approuvé', + 'Techniquement opérationnel' => 'Techniquement opérationnel', + ] + ] + ] + ]; + + public function collectData() + { + $urlParts = [ + 'id' => 'observatoire_2g_3g_4g', + 'resource_id' => '88ef0887-6b0f-4d3f-8545-6d64c8f597da', + 'fields' => 'id,adm_lb_nom,sta_nm_dpt,emr_lb_systeme,generation,date_maj,sta_nm_anfr,adr_lb_lieu,adr_lb_add1,adr_lb_add2,adr_lb_add3,adr_nm_cp,statut', + 'rows' => 10000 + ]; + + if (!empty($this->getInput('departement'))) { + $urlParts['refine.sta_nm_dpt'] = urlencode($this->getInput('departement')); + } + + if (!empty($this->getInput('generation'))) { + $urlParts['refine.generation'] = $this->getInput('generation'); + } + + if (!empty($this->getInput('operateur'))) { + // http_build_query() already does urlencoding so this call is redundant + $urlParts['refine.adm_lb_nom'] = urlencode($this->getInput('operateur')); + } + + if (!empty($this->getInput('statut'))) { + $urlParts['refine.statut'] = urlencode($this->getInput('statut')); + } + + // API seems to not play well with urlencoded data + $url = urljoin(static::URI, '/d4c/api/records/1.0/download/?' . urldecode(http_build_query($urlParts))); + + $json = getContents($url); + $data = Json::decode($json, false); + $records = $data->records; + $frequenciesByStation = []; + foreach ($records as $record) { + if (!isset($frequenciesByStation[$record->fields->sta_nm_anfr])) { + $street = sprintf( + '%s %s %s', + $record->fields->adr_lb_add1 ?? '', + $record->fields->adr_lb_add2 ?? '', + $record->fields->adr_lb_add3 ?? '' + ); + $frequenciesByStation[$record->fields->sta_nm_anfr] = [ + 'id' => $record->fields->sta_nm_anfr, + 'operator' => $record->fields->adm_lb_nom, + 'frequencies' => [], + 'lastUpdate' => 0, + 'address' => [ + 'street' => trim($street), + 'postCode' => $record->fields->adr_nm_cp, + 'city' => $record->fields->adr_lb_lieu + ] + ]; + } + + $frequenciesByStation[$record->fields->sta_nm_anfr]['frequencies'][] = [ + 'generation' => $record->fields->generation, + 'frequency' => $record->fields->emr_lb_systeme, + 'status' => $record->fields->statut, + 'updatedAt' => strtotime($record->fields->date_maj), + ]; + + $frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'] = max( + $frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'], + strtotime($record->fields->date_maj) + ); + } + + usort($frequenciesByStation, static fn ($a, $b) => $b['lastUpdate'] <=> $a['lastUpdate']); + + foreach ($frequenciesByStation as $station) { + $title = sprintf( + '[%s] Mise à jour de la station n°%s à %s (%s)', + $station['operator'], + $station['id'], + $station['address']['city'], + $station['address']['postCode'] + ); + + $array_reduce = array_reduce($station['frequencies'], static function ($carry, $frequency) { + return sprintf('%s
  • %s : %s
  • ', $carry, $frequency['frequency'], $frequency['status']); + }, ''); + + $content = sprintf( + '

    Adresse complète

    %s
    %s
    %s

    Fréquences

    ', + $station['address']['street'], + $station['address']['postCode'], + $station['address']['city'], + $array_reduce + ); + + $this->items[] = [ + 'uid' => $station['id'], + 'timestamp' => $station['lastUpdate'], + 'title' => $title, + 'content' => $content, + ]; + } + } +} \ No newline at end of file diff --git a/bridges/AnisearchBridge.php b/bridges/AnisearchBridge.php index d5aad1c9352..c805cfcb3db 100644 --- a/bridges/AnisearchBridge.php +++ b/bridges/AnisearchBridge.php @@ -68,10 +68,11 @@ public function collectData() if (isset($trailerlink)) { $trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href); $trailer = $trailersite->find('div#player > iframe', 0); + $trailer = $trailer->{'data-xsrc'}; $ytlink = <<'; + referrerpolicy="strict-origin-when-cross-origin" allowfullscreen> EOT; } } diff --git a/bridges/BadDragonBridge.php b/bridges/BadDragonBridge.php index d38e34083b0..2249d6f7555 100644 --- a/bridges/BadDragonBridge.php +++ b/bridges/BadDragonBridge.php @@ -284,8 +284,7 @@ public function collectData() case 'Clearance': $toyData = json_decode(getContents($this->inputToURL(true))); - $productList = json_decode(getContents(self::URI - . 'api/inventory-toy/product-list')); + $productList = json_decode(getContents(self::URI . 'api/inventory-toy/product-list')); foreach ($toyData->toys as $toy) { $item = []; diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index a9bd2ea13bb..80bb7fd07dc 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -111,12 +111,12 @@ public function collectData() $url = self::URI . 'api/hub/1/dig_deeper'; $data = $this->buildRequestJson(); $header = [ - 'Content-Type: application/json', - 'Content-Length: ' . strlen($data) + 'Content-Type: application/json', + 'Content-Length: ' . strlen($data), ]; $opts = [ - CURLOPT_CUSTOMREQUEST => 'POST', - CURLOPT_POSTFIELDS => $data + CURLOPT_CUSTOMREQUEST => 'POST', + CURLOPT_POSTFIELDS => $data, ]; $content = getContents($url, $header, $opts); @@ -314,7 +314,8 @@ private function apiGet($endpoint, $query_data) { $url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data); // todo: 429 Too Many Requests happens a lot - $data = json_decode(getContents($url)); + $response = getContents($url); + $data = json_decode($response); return $data; } diff --git a/bridges/BodaccBridge.php b/bridges/BodaccBridge.php new file mode 100644 index 00000000000..38e5856a0f2 --- /dev/null +++ b/bridges/BodaccBridge.php @@ -0,0 +1,218 @@ + [ + 'departement' => [ + 'name' => 'Département', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Ain' => '01', + 'Aisne' => '02', + 'Allier' => '03', + 'Alpes-de-Haute-Provence' => '04', + 'Hautes-Alpes' => '05', + 'Alpes-Maritimes' => '06', + 'Ardèche' => '07', + 'Ardennes' => '08', + 'Ariège' => '09', + 'Aube' => '10', + 'Aude' => '11', + 'Aveyron' => '12', + 'Bouches-du-Rhône' => '13', + 'Calvados' => '14', + 'Cantal' => '15', + 'Charente' => '16', + 'Charente-Maritime' => '17', + 'Cher' => '18', + 'Corrèze' => '19', + 'Corse-du-Sud' => '2A', + 'Haute-Corse' => '2B', + 'Côte-d\'Or' => '21', + 'Côtes-d\'Armor' => '22', + 'Creuse' => '23', + 'Dordogne' => '24', + 'Doubs' => '25', + 'Drôme' => '26', + 'Eure' => '27', + 'Eure-et-Loir' => '28', + 'Finistère' => '29', + 'Gard' => '30', + 'Haute-Garonne' => '31', + 'Gers' => '32', + 'Gironde' => '33', + 'Hérault' => '34', + 'Ille-et-Vilaine' => '35', + 'Indre' => '36', + 'Indre-et-Loire' => '37', + 'Isère' => '38', + 'Jura' => '39', + 'Landes' => '40', + 'Loir-et-Cher' => '41', + 'Loire' => '42', + 'Haute-Loire' => '43', + 'Loire-Atlantique' => '44', + 'Loiret' => '45', + 'Lot' => '46', + 'Lot-et-Garonne' => '47', + 'Lozère' => '48', + 'Maine-et-Loire' => '49', + 'Manche' => '50', + 'Marne' => '51', + 'Haute-Marne' => '52', + 'Mayenne' => '53', + 'Meurthe-et-Moselle' => '54', + 'Meuse' => '55', + 'Morbihan' => '56', + 'Moselle' => '57', + 'Nièvre' => '58', + 'Nord' => '59', + 'Oise' => '60', + 'Orne' => '61', + 'Pas-de-Calais' => '62', + 'Puy-de-Dôme' => '63', + 'Pyrénées-Atlantiques' => '64', + 'Hautes-Pyrénées' => '65', + 'Pyrénées-Orientales' => '66', + 'Bas-Rhin' => '67', + 'Haut-Rhin' => '68', + 'Rhône' => '69', + 'Haute-Saône' => '70', + 'Saône-et-Loire' => '71', + 'Sarthe' => '72', + 'Savoie' => '73', + 'Haute-Savoie' => '74', + 'Paris' => '75', + 'Seine-Maritime' => '76', + 'Seine-et-Marne' => '77', + 'Yvelines' => '78', + 'Deux-Sèvres' => '79', + 'Somme' => '80', + 'Tarn' => '81', + 'Tarn-et-Garonne' => '82', + 'Var' => '83', + 'Vaucluse' => '84', + 'Vendée' => '85', + 'Vienne' => '86', + 'Haute-Vienne' => '87', + 'Vosges' => '88', + 'Yonne' => '89', + 'Territoire de Belfort' => '90', + 'Essonne' => '91', + 'Hauts-de-Seine' => '92', + 'Seine-Saint-Denis' => '93', + 'Val-de-Marne' => '94', + 'Val-d\'Oise' => '95', + 'Guadeloupe' => '971', + 'Martinique' => '972', + 'Guyane' => '973', + 'La Réunion' => '974', + 'Saint-Pierre-et-Miquelon' => '975', + 'Mayotte' => '976', + 'Saint-Barthélemy' => '977', + 'Saint-Martin' => '978', + 'Terres australes et antarctiques françaises' => '984', + 'Wallis-et-Futuna' => '986', + 'Polynésie française' => '987', + 'Nouvelle-Calédonie' => '988', + 'Île de Clipperton' => '989' + ] + ], + 'famille' => [ + 'name' => 'Famille', + 'type' => 'list', + 'values' => [ + 'Toutes' => null, + 'Annonces diverses' => 'divers', + 'Créations' => 'creation', + 'Dépôts des comptes' => 'dpc', + 'Immatriculations' => 'immatriculation', + 'Modifications diverses' => 'modification', + 'Procédures collectives' => 'collective', + 'Procédures de conciliation' => 'conciliation', + 'Procédures de rétablissement professionnel' => 'retablissement_professionnel', + 'Radiations' => 'radiation', + 'Ventes et cessions' => 'vente' + ] + ], + 'type' => [ + 'name' => 'Type', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Avis initial' => 'annonce', + 'Avis d\'annulation' => 'annulation', + 'Avis rectificatif' => 'rectificatif' + ] + ] + ] + ]; + + public function collectData() + { + $parameters = [ + 'select' => 'id,dateparution,typeavis_lib,familleavis_lib,commercant,ville,cp', + 'order_by' => 'id desc', + 'limit' => 50, + ]; + + $where = []; + if (!empty($this->getInput('departement'))) { + $where[] = 'numerodepartement="' . $this->getInput('departement') . '"'; + } + + if (!empty($this->getInput('famille'))) { + $where[] = 'familleavis="' . $this->getInput('famille') . '"'; + } + + if (!empty($this->getInput('type'))) { + $where[] = 'typeavis="' . $this->getInput('type') . '"'; + } + + if ($where !== []) { + $parameters['where'] = implode(' and ', $where); + } + + $url = urljoin(self::URI, '/api/explore/v2.1/catalog/datasets/annonces-commerciales/records?' . http_build_query($parameters)); + + $data = Json::decode(getContents($url), false); + + foreach ($data->results as $result) { + if ( + !isset( + $result->id, + $result->dateparution, + $result->typeavis_lib, + $result->familleavis_lib, + $result->commercant, + $result->ville, + $result->cp + ) + ) { + continue; + } + + $title = sprintf( + '[%s] %s - %s à %s (%s)', + $result->typeavis_lib, + $result->familleavis_lib, + $result->commercant, + $result->ville, + $result->cp + ); + + $this->items[] = [ + 'uid' => $result->id, + 'timestamp' => strtotime($result->dateparution), + 'title' => $title, + ]; + } + } +} diff --git a/bridges/CentreFranceBridge.php b/bridges/CentreFranceBridge.php new file mode 100644 index 00000000000..a6dea227651 --- /dev/null +++ b/bridges/CentreFranceBridge.php @@ -0,0 +1,279 @@ + [ + 'newspaper' => [ + 'name' => 'Newspaper', + 'type' => 'list', + 'values' => [ + 'La Montagne' => 'lamontagne.fr', + 'Le Populaire du Centre' => 'lepopulaire.fr', + 'La République du Centre' => 'larep.fr', + 'Le Berry Républicain' => 'leberry.fr', + 'L\'Yonne Républicaine' => 'lyonne.fr', + 'L\'Écho Républicain' => 'lechorepublicain.fr', + 'Le Journal du Centre' => 'lejdc.fr', + 'L\'Éveil de la Haute-Loire' => 'leveil.fr', + 'Le Pays' => 'le-pays.fr' + ] + ], + 'remove-reserved-for-subscribers-articles' => [ + 'name' => 'Remove reserved for subscribers articles', + 'type' => 'checkbox', + 'title' => 'Filter out articles that are only available to subscribers' + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'How many articles to fetch. 0 to disable.', + 'required' => true, + 'defaultValue' => 15 + ] + ], + 'Local news' => [ + 'locality-slug' => [ + 'name' => 'Locality slug', + 'type' => 'text', + 'required' => false, + 'title' => 'Fetch articles for a specific locality. If not set, headlines from the front page will be used instead.', + 'exampleValue' => 'moulins-03000' + ], + ] + ]; + + public function collectData() + { + $value = $this->getInput('limit'); + if (is_numeric($value) && (int)$value >= 0) { + $limit = $value; + } else { + $limit = static::PARAMETERS['global']['limit']['defaultValue']; + } + + if (empty($this->getInput('newspaper'))) { + return; + } + + $localitySlug = $this->getInput('locality-slug') ?? ''; + $alreadyFoundArticlesURIs = []; + + $newspaperUrl = 'https://www.' . $this->getInput('newspaper') . '/' . $localitySlug . '/'; + $html = getSimpleHTMLDOM($newspaperUrl); + + // Articles are detected through their titles + foreach ($html->find('.c-titre') as $articleTitleDOMElement) { + $articleLinkDOMElement = $articleTitleDOMElement->find('a', 0); + + // Ignore articles in the « Les + partagés » block + if (strpos($articleLinkDOMElement->id, 'les_plus_partages') !== false) { + continue; + } + + $articleURI = $articleLinkDOMElement->href; + + // If the URI has already been processed, ignore it + if (in_array($articleURI, $alreadyFoundArticlesURIs, true)) { + continue; + } + + // If news are filtered for a specific locality, filter out article for other localities + if ($localitySlug !== '' && !str_contains($articleURI, $localitySlug)) { + continue; + } + + $articleTitle = ''; + + // If article is reserved for subscribers + if ($articleLinkDOMElement->find('span.premium-picto', 0)) { + if ($this->getInput('remove-reserved-for-subscribers-articles') === true) { + continue; + } + + $articleTitle .= '🔒 '; + } + + $articleTitleDOMElement = $articleLinkDOMElement->find('span[data-tb-title]', 0); + if ($articleTitleDOMElement === null) { + continue; + } + + if ($limit > 0 && count($this->items) === $limit) { + break; + } + + $articleTitle .= $articleLinkDOMElement->find('span[data-tb-title]', 0)->innertext; + $articleFullURI = urljoin('https://www.' . $this->getInput('newspaper') . '/', $articleURI); + + $item = [ + 'title' => $articleTitle, + 'uri' => $articleFullURI, + ...$this->collectArticleData($articleFullURI) + ]; + $this->items[] = $item; + + $alreadyFoundArticlesURIs[] = $articleURI; + } + } + + private function collectArticleData($uri): array + { + $html = getSimpleHTMLDOMCached($uri, 86400 * 90); // 90d + + $item = [ + 'enclosures' => [], + ]; + + $articleInformations = $html->find('.c-article-informations p'); + if (is_array($articleInformations) && $articleInformations !== []) { + $authorPosition = 1; + + // Article publication date + if (preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[0]->innertext, $articleDateParts) > 0) { + $articleDate = new \DateTime('midnight'); + $articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]); + + if (count($articleDateParts) === 7) { + $articleDate->setTime($articleDateParts[5], $articleDateParts[6]); + } + + $item['timestamp'] = $articleDate->getTimestamp(); + } + + // Article update date + if (count($articleInformations) >= 2 && preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[1]->innertext, $articleDateParts) > 0) { + $authorPosition = 2; + + $articleDate = new \DateTime('midnight'); + $articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]); + + if (count($articleDateParts) === 7) { + $articleDate->setTime($articleDateParts[5], $articleDateParts[6]); + } + + $item['timestamp'] = $articleDate->getTimestamp(); + } + + if (count($articleInformations) === ($authorPosition + 1)) { + $item['author'] = $articleInformations[$authorPosition]->innertext; + } + } + + $articleContent = $html->find('.b-article .contenu > *'); + if (is_array($articleContent)) { + $item['content'] = ''; + + foreach ($articleContent as $contentPart) { + if (in_array($contentPart->getAttribute('id'), ['cf-audio-player', 'poool-widget'], true)) { + continue; + } + + $articleHiddenParts = $contentPart->find('.bloc, .p402_hide'); + if (is_array($articleHiddenParts)) { + foreach ($articleHiddenParts as $articleHiddenPart) { + $contentPart->removeChild($articleHiddenPart); + } + } + + $item['content'] .= $contentPart->innertext; + } + } + + $articleIllustration = $html->find('.photo-wrapper .photo-box img'); + if (is_array($articleIllustration) && count($articleIllustration) === 1) { + $item['enclosures'][] = $articleIllustration[0]->getAttribute('src'); + } + + $articleAudio = $html->find('#cf-audio-player-container audio'); + if (is_array($articleAudio) && count($articleAudio) === 1) { + $item['enclosures'][] = $articleAudio[0]->getAttribute('src'); + } + + $articleTags = $html->find('.b-article > ul.c-tags > li > a.t-simple'); + if (is_array($articleTags)) { + $item['categories'] = array_map(static fn ($articleTag) => $articleTag->innertext, $articleTags); + } + + $explode = explode('_', $uri); + $array_reverse = array_reverse($explode); + $string = $array_reverse[0]; + $uid = rtrim($string, '/'); + if (is_numeric($uid)) { + $item['uid'] = $uid; + } + + // If the article is a "grand format", we use another parsing strategy + if ($item['content'] === '' && $html->find('article') !== []) { + $articleContent = $html->find('article > section'); + foreach ($articleContent as $contentPart) { + if ($contentPart->find('#journo') !== []) { + $item['author'] = $contentPart->find('#journo')->innertext; + continue; + } + + $item['content'] .= $contentPart->innertext; + } + } + + $item['content'] = str_replace('premium', '🔒', $item['content']); + $item['content'] = trim($item['content']); + + return $item; + } + + public function getName() + { + if (empty($this->getInput('newspaper'))) { + return static::NAME; + } + + $newspaperNameByDomain = array_flip(self::PARAMETERS['global']['newspaper']['values']); + if (!isset($newspaperNameByDomain[$this->getInput('newspaper')])) { + return static::NAME; + } + + $completeTitle = $newspaperNameByDomain[$this->getInput('newspaper')]; + + if (!empty($this->getInput('locality-slug'))) { + $localityName = explode('-', $this->getInput('locality-slug')); + array_pop($localityName); + $completeTitle .= ' ' . ucfirst(implode('-', $localityName)); + } + + return $completeTitle; + } + + public function getIcon() + { + if (empty($this->getInput('newspaper'))) { + return static::URI . '/favicon.ico'; + } + + return 'https://www.' . $this->getInput('newspaper') . '/favicon.ico'; + } + + public function detectParameters($url) + { + $regex = '/^(https?:\/\/)?(www\.)?([a-z-]+\.fr)(\/)?([a-z-]+-[0-9]{5})?(\/)?$/'; + $url = strtolower($url); + + if (preg_match($regex, $url, $urlMatches) === 0) { + return null; + } + + if (!in_array($urlMatches[3], self::PARAMETERS['global']['newspaper']['values'], true)) { + return null; + } + + return [ + 'newspaper' => $urlMatches[3], + 'locality-slug' => empty($urlMatches[5]) ? null : $urlMatches[5] + ]; + } +} diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index de5544ecf91..a9a71886c17 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -6,13 +6,17 @@ class CryptomeBridge extends BridgeAbstract const NAME = 'Cryptome'; const URI = 'https://cryptome.org/'; const CACHE_TIMEOUT = 21600; // 6h - const DESCRIPTION = 'Returns the N most recent documents.'; - const PARAMETERS = [ [ + const DESCRIPTION = [ + 'en-US' => 'Returns the N most recent documents.', + ]; + const PARAMETERS = [[ 'n' => [ - 'name' => 'number of elements', + 'name' => [ + 'en-US' => 'number of elements', + ], 'type' => 'number', 'required' => true, - 'exampleValue' => 10 + 'exampleValue' => 10, ] ]]; diff --git a/bridges/CubariBridge.php b/bridges/CubariBridge.php index a7b6d69def2..72fadf6ec6c 100644 --- a/bridges/CubariBridge.php +++ b/bridges/CubariBridge.php @@ -47,8 +47,10 @@ public function getURI() */ public function collectData() { + // TODO: fix trivial SSRF $json = getContents($this->getInput('gist')); - $jsonFile = json_decode($json, true); + + $jsonFile = Json::decode($json); $this->mangaTitle = $jsonFile['title']; diff --git a/bridges/DemosBerlinBridge.php b/bridges/DemosBerlinBridge.php index 05fd2335d45..cc44a7cf504 100644 --- a/bridges/DemosBerlinBridge.php +++ b/bridges/DemosBerlinBridge.php @@ -24,7 +24,8 @@ public function getIcon() public function collectData() { - $json = getContents('https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json'); + $url = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json'; + $json = getContents($url); $jsonFile = json_decode($json, true); $daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day'); diff --git a/bridges/DerpibooruBridge.php b/bridges/DerpibooruBridge.php index e06e0eff25d..2d650d57e46 100644 --- a/bridges/DerpibooruBridge.php +++ b/bridges/DerpibooruBridge.php @@ -78,13 +78,9 @@ public function getURI() public function collectData() { - $queryJson = json_decode(getContents( - self::URI - . 'api/v1/json/search/images?filter_id=' - . urlencode($this->getInput('f')) - . '&q=' - . urlencode($this->getInput('q')) - )); + $url = self::URI . 'api/v1/json/search/images?filter_id=' . urlencode($this->getInput('f')) . '&q=' . urlencode($this->getInput('q')); + + $queryJson = json_decode(getContents($url)); foreach ($queryJson->images as $post) { $item = []; diff --git a/bridges/EBayBridge.php b/bridges/EBayBridge.php index 879581645cc..463f73d6321 100644 --- a/bridges/EBayBridge.php +++ b/bridges/EBayBridge.php @@ -5,15 +5,21 @@ class EBayBridge extends BridgeAbstract const NAME = 'eBay'; const DESCRIPTION = 'Returns the search results from the eBay auctioning platforms'; const URI = 'https://www.eBay.com'; - const MAINTAINER = 'wrobelda'; + const MAINTAINER = 'NotsoanoNimus, wrobelda'; const PARAMETERS = [[ 'url' => [ 'name' => 'Search URL', 'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here', - 'pattern' => '^(https:\/\/)?(www\.)?(befr\.|benl\.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$', + 'pattern' => '^(https:\/\/)?(www\.)?(befr\.|benl\.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk)\/.*$', 'exampleValue' => 'https://www.ebay.com/sch/i.html?_nkw=atom+rss', 'required' => true, - ] + ], + 'includesSearchLink' => [ + 'name' => 'Include Original Search Link', + 'title' => 'Whether or not each feed item should include the original search query link to eBay which was used to find the given listing.', + 'type' => 'checkbox', + 'defaultValue' => false, + ], ]]; public function getURI() @@ -23,6 +29,10 @@ public function getURI() $uri = trim(preg_replace('/([?&])_sop=[^&]+(&|$)/', '$1', $this->getInput('url')), '?&/'); $uri .= (parse_url($uri, PHP_URL_QUERY) ? '&' : '?') . '_sop=10'; + // Ensure the List View is used instead of the Gallery View. + $uri = trim(preg_replace('/[?&]_dmd=[^&]+(&|$)/i', '$1', $uri), '?&/'); + $uri .= '&_dmd=1'; + return $uri; } else { return parent::getURI(); @@ -46,7 +56,7 @@ public function getName() }); if ($searchQuery) { - return $searchQuery[0]; + return 'eBay - ' . $searchQuery[0]; } return parent::getName(); @@ -61,44 +71,90 @@ public function collectData() $inexactMatches->remove(); } + // Remove "NEW LISTING" labels: we sort by the newest, so this is redundant. + foreach ($html->find('.LIGHT_HIGHLIGHT') as $new_listing_label) { + $new_listing_label->remove(); + } + $results = $html->find('ul.srp-results > li.s-item'); foreach ($results as $listing) { $item = []; - // Remove "NEW LISTING" label, we sort by the newest, so this is redundant - foreach ($listing->find('.LIGHT_HIGHLIGHT') as $new_listing_label) { - $new_listing_label->remove(); - } + // Define a closure to shorten the ugliness of querying the current listing. + $find = function ($query, $altText = '') use ($listing) { + return $listing->find($query, 0)->plaintext ?? $altText; + }; - $listingTitle = $listing->find('.s-item__title', 0); - if ($listingTitle) { - $item['title'] = $listingTitle->plaintext; + $item['title'] = $find('.s-item__title'); + if (!$item['title']) { + // Skip entries where the title cannot be found (for w/e reason). + continue; } - $subtitle = implode('', $listing->find('.s-item__subtitle')); - - $listingUrl = $listing->find('.s-item__link', 0); - if ($listingUrl) { - $item['uri'] = $listingUrl->href; + // It appears there may be more than a single 'subtitle' subclass in the listing. Collate them. + $subtitles = $listing->find('.s-item__subtitle'); + if (is_array($subtitles)) { + $subtitle = trim(implode(' ', array_column($subtitles, 'plaintext'))); } else { - $item['uri'] = null; + $subtitle = trim($subtitles->plaintext ?? ''); } + // Get the listing's link and uid. + $itemUri = $listing->find('.s-item__link', 0); + if ($itemUri) { + $item['uri'] = $itemUri->href; + } if (preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches)) { $item['uid'] = $matches[1]; } - $priceDom = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0); - $price = $priceDom->plaintext ?? 'N/A'; + // Price should be fetched on its own so we can provide the alt text without complication. + $price = $find('.s-item__price', '[NO PRICE]'); + + // Map a list of dynamic variable names to their subclasses within the listing. + // This is just a bit of sugar to make this cleaner and more maintainable. + $propertyMappings = [ + 'additionalPrice' => '.s-item__additional-price', + 'discount' => '.s-item__discount', + 'shippingFree' => '.s-item__freeXDays', + 'localDelivery' => '.s-item__localDelivery', + 'logisticsCost' => '.s-item__logisticsCost', + 'location' => '.s-item__location', + 'obo' => '.s-item__formatBestOfferEnabled', + 'sellerInfo' => '.s-item__seller-info-text', + 'bids' => '.s-item__bidCount', + 'timeLeft' => '.s-item__time-left', + 'timeEnd' => '.s-item__time-end', + ]; + + foreach ($propertyMappings as $k => $v) { + $$k = $find($v); + } - $shippingFree = $listing->find('.s-item__details > .s-item__detail > .s-item__freeXDays', 0)->plaintext ?? ''; - $localDelivery = $listing->find('.s-item__details > .s-item__detail > .s-item__localDelivery', 0)->plaintext ?? ''; - $logisticsCost = $listing->find('.s-item__details > .s-item__detail > .s-item__logisticsCost', 0)->plaintext ?? ''; + // When an additional price detail or discount is defined, create the 'discountLine'. + if ($additionalPrice || $discount) { + $discountLine = '
    (' + . trim($additionalPrice ?? '') + . '; ' . trim($discount ?? '') + . ')'; + } else { + $discountLine = ''; + } - $location = $listing->find('.s-item__details > .s-item__detail > .s-item__location', 0)->plaintext ?? ''; + // Prepend the time-left info with a comma if the right details were found. + $timeInfo = trim($timeLeft . ' ' . $timeEnd); + if ($timeInfo) { + $timeInfo = ', ' . $timeInfo; + } - $sellerInfo = $listing->find('.s-item__seller-info-text', 0)->plaintext ?? ''; + // Set the listing type. + if ($bids) { + $listingTypeDetails = "Auction: {$bids}{$timeInfo}"; + } else { + $listingTypeDetails = 'Buy It Now'; + } + // Acquire the listing's primary image and atach it. $image = $listing->find('.s-item__image-wrapper > img', 0); if ($image) { // Not quite sure why append fragment here @@ -106,11 +162,23 @@ public function collectData() $item['enclosures'] = [$imageUrl]; } + // Include the original search link, if specified. + if ($this->getInput('includesSearchLink')) { + $searchLink = '

    View Search

    '; + } else { + $searchLink = ''; + } + + // Build the final item's content to display and add the item onto the list. $item['content'] = <<$sellerInfo $location

    -

    $price $shippingFree $localDelivery $logisticsCost

    -

    $subtitle

    +

    $price $obo ($listingTypeDetails) + $discountLine +
    $shippingFree $localDelivery $logisticsCost

    +

    {$subtitle}

    +$searchLink CONTENT; + $this->items[] = $item; } } diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 25a88124266..556bd39ec38 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -50,7 +50,9 @@ public function collectData() $eztv_uri = $this->getEztvUri(); $ids = explode(',', trim($this->getInput('ids'))); foreach ($ids as $id) { - $data = json_decode(getContents(sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id))); + $url = sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id); + $json = getContents($url); + $data = json_decode($json); if (!isset($data->torrents)) { // No results continue; diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 70117cb01ae..eaa50ba1d16 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -8,6 +8,12 @@ class EconomistBridge extends FeedExpander const CACHE_TIMEOUT = 3600; //1hour const DESCRIPTION = 'Returns the latest articles for the selected category'; + const CONFIGURATION = [ + 'cookie' => [ + 'required' => false, + ] + ]; + const PARAMETERS = [ 'global' => [ 'limit' => [ @@ -99,8 +105,20 @@ public function collectData() protected function parseItem(array $item) { + $headers = []; + if ($this->getOption('cookie')) { + $headers = [ + 'Authority: www.economist.com', + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-language: en-US,en;q=0.9', + 'Cache-control: max-age=0', + 'Cookie: ' . $this->getOption('cookie'), + 'Upgrade-insecure-requests: 1', + 'User-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36' + ]; + } try { - $dom = getSimpleHTMLDOM($item['uri']); + $dom = getSimpleHTMLDOM($item['uri'], $headers); } catch (Exception $e) { $item['content'] = $e->getMessage(); return $item; @@ -209,6 +227,15 @@ private function processContent($html, $elem) foreach ($elem->find('a.ds-link-with-arrow-icon') as $a) { $a->parent->removeChild($a); } + // Sections like "Leaders on day X" + foreach ($elem->find('div[data-tracking-id=content-well-chapter-list]') as $div) { + $div->parent->removeChild($div); + } + // "Explore more" section + foreach ($elem->find('h3[id=article-tags]') as $h3) { + $div = $h3->parent; + $div->parent->removeChild($div); + } // The Economist puts infographics into iframes, which doesn't // work in any of my readers. So this replaces iframes with diff --git a/bridges/EconomistWorldInBriefBridge.php b/bridges/EconomistWorldInBriefBridge.php index 47782a511f3..4e65b15f2c1 100644 --- a/bridges/EconomistWorldInBriefBridge.php +++ b/bridges/EconomistWorldInBriefBridge.php @@ -9,6 +9,12 @@ class EconomistWorldInBriefBridge extends BridgeAbstract const CACHE_TIMEOUT = 3600; // 1 hour const DESCRIPTION = 'Returns stories from the World in Brief section'; + const CONFIGURATION = [ + 'cookie' => [ + 'required' => false, + ] + ]; + const PARAMETERS = [ '' => [ 'splitGobbets' => [ @@ -41,7 +47,19 @@ class EconomistWorldInBriefBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM(self::URI); + $headers = []; + if ($this->getOption('cookie')) { + $headers = [ + 'Authority: www.economist.com', + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-language: en-US,en;q=0.9', + 'Cache-control: max-age=0', + 'Cookie: ' . $this->getOption('cookie'), + 'Upgrade-insecure-requests: 1', + 'User-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36' + ]; + } + $html = getSimpleHTMLDOM(self::URI, $headers); $gobbets = $html->find('._gobbets', 0); if ($this->getInput('splitGobbets') == 1) { $this->splitGobbets($gobbets); @@ -50,7 +68,9 @@ public function collectData() }; if ($this->getInput('agenda') == 1) { $articles = $html->find('._articles', 0); - $this->collectArticles($articles); + if ($articles != null) { + $this->collectArticles($articles); + } } if ($this->getInput('quote') == 1) { $quote = $html->find('._quote-container', 0); diff --git a/bridges/ElloBridge.php b/bridges/ElloBridge.php index 42c88a06860..a9e69cfe005 100644 --- a/bridges/ElloBridge.php +++ b/bridges/ElloBridge.php @@ -34,11 +34,9 @@ public function collectData() ]; if (!empty($this->getInput('u'))) { - $postData = getContents(self::URI . 'api/v2/users/~' . urlencode($this->getInput('u')) . '/posts', $header) or - returnServerError('Unable to query Ello API.'); + $postData = getContents(self::URI . 'api/v2/users/~' . urlencode($this->getInput('u')) . '/posts', $header); } else { - $postData = getContents(self::URI . 'api/v2/posts?terms=' . urlencode($this->getInput('s')), $header) or - returnServerError('Unable to query Ello API.'); + $postData = getContents(self::URI . 'api/v2/posts?terms=' . urlencode($this->getInput('s')), $header); } $postData = json_decode($postData); @@ -117,7 +115,7 @@ private function getAPIKey() $apiKey = $this->cache->get($cacheKey); if (!$apiKey) { - $keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.'); + $keyInfo = getContents(self::URI . 'api/webapp-token'); $apiKey = json_decode($keyInfo)->token->access_token; $ttl = 60 * 60 * 20; $this->cache->set($cacheKey, $apiKey, $ttl); diff --git a/bridges/FDroidBridge.php b/bridges/FDroidBridge.php index 8d3b780854c..fdf0262fc97 100644 --- a/bridges/FDroidBridge.php +++ b/bridges/FDroidBridge.php @@ -31,7 +31,7 @@ private function getTimestamp($url) CURLOPT_NOBODY => true, ]; $reponse = getContents($url, [], $curlOptions, true); - $lastModified = $reponse['headers']['last-modified'][0] ?? null; + $lastModified = $reponse->getHeader('last-modified'); $timestamp = strtotime($lastModified ?? 'today'); return $timestamp; } diff --git a/bridges/FunkBridge.php b/bridges/FunkBridge.php index df499035db2..e4935ffb9e8 100644 --- a/bridges/FunkBridge.php +++ b/bridges/FunkBridge.php @@ -32,7 +32,7 @@ public function collectData() $url .= '?size=' . $this->getInput('max'); } - $jsonString = getContents($url) or returnServerError('No contents received!'); + $jsonString = getContents($url); $json = json_decode($jsonString, true); foreach ($json['list'] as $element) { diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index 4aa047992c6..361e3f1d41f 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -31,7 +31,7 @@ public function collectData() $img = $this->findItemImage($newsItem, 'a.news_image'); $time = $this->findItemDate($newsItem); $author = $newsItem->find('a.username', 0)->plaintext; - $title = $this->decodeHtmlEntities($newsItem->find('h3.news_title', 0)->plaintext); + $title = $this->decodeHtmlEntities($newsItem->find('h2.news_title', 0)->plaintext); $content = $this->fetchPostContent($url, self::URI); $this->items[] = $this->buildItem($url, $title, $author, $time, $img, $content); unset($newsItem); // Some items are heavy, freeing the item proactively helps saving memory @@ -41,7 +41,7 @@ public function collectData() foreach ($html->find('li.portal_review') as $reviewItem) { $url = urljoin(self::URI, $reviewItem->find('a.review_boxart', 0)->href); $img = $this->findItemImage($reviewItem, 'a.review_boxart'); - $title = $this->decodeHtmlEntities($reviewItem->find('h2.review_title', 0)->plaintext); + $title = $this->decodeHtmlEntities($reviewItem->find('div.review_title', 0)->find('h2', 0)->plaintext); $content = getSimpleHTMLDOMCached($url); $author = $content->find('span.author--name', 0)->plaintext; $time = $this->findItemDate($content); diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php index 5fc6b33c4bc..96d16bf9bde 100644 --- a/bridges/GelbooruBridge.php +++ b/bridges/GelbooruBridge.php @@ -33,7 +33,7 @@ protected function getFullURI() return $this->getURI() . 'index.php?&page=dapi&s=post&q=index&json=1&pid=' . $this->getInput('p') . '&limit=' . $this->getInput('l') - . '&tags=' . urlencode($this->getInput('t')); + . '&tags=' . urlencode($this->getInput('t') ?? ''); } /* @@ -76,18 +76,16 @@ protected function getItemFromElement($element) public function collectData() { - $content = getContents($this->getFullURI()); - // $content is empty string + $url = $this->getFullURI(); + $content = getContents($url); - // Most other Gelbooru-based boorus put their content in the root of - // the JSON. This check is here for Bridges that inherit from this one - $posts = json_decode($content); - if (isset($posts->post)) { - $posts = $posts->post; + if ($content === '') { + return; } - if (is_null($posts)) { - returnServerError('No posts found.'); + $posts = Json::decode($content, false); + if (isset($posts->post)) { + $posts = $posts->post; } foreach ($posts as $post) { diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 0f8e5e9630c..2ce47270f87 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -586,16 +586,18 @@ class GithubTrendingBridge extends BridgeAbstract 'Monthly' => 'monthly', ], 'defaultValue' => 'today' + ], + 'spokenLanguage' => [ + 'name' => 'Spoken Language Code', + 'type' => 'text', + 'exampleValue' => 'en', ] ] - ]; public function collectData() { - $params = ['since' => urlencode($this->getInput('date_range'))]; - $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); - + $url = $this->constructUrl(); $html = getSimpleHTMLDOM($url); $this->items = []; @@ -630,4 +632,32 @@ public function getName() return parent::getName(); } + + private function constructUrl() + { + $url = self::URI; + $language = $this->getInput('language'); + $dateRange = $this->getInput('date_range'); + $spokenLanguage = $this->getInput('spokenLanguage'); + + if (!empty($language)) { + $url .= '/' . $language; + } + + $queryParams = []; + + if (!empty($dateRange)) { + $queryParams['since'] = $dateRange; + } + + if (!empty($spokenLanguage)) { + $queryParams['spoken_language_code'] = trim($spokenLanguage); + } + + if (!empty($queryParams)) { + $url .= '?' . http_build_query($queryParams); + } + + return $url; + } } diff --git a/bridges/GlowficBridge.php b/bridges/GlowficBridge.php index b51ead8de9f..0e4b8d93704 100644 --- a/bridges/GlowficBridge.php +++ b/bridges/GlowficBridge.php @@ -41,8 +41,7 @@ public function collectData() $first_page = 1; } for ($page_offset = $first_page; $page_offset <= $metadata['Last-Page']; $page_offset++) { - $jsonContents = getContents($url . '/replies?page=' . $page_offset) or - returnClientError('Could not retrieve replies for page ' . $page_offset . '.'); + $jsonContents = getContents($url . '/replies?page=' . $page_offset); $replies = json_decode($jsonContents); foreach ($replies as $reply) { $item = []; @@ -75,8 +74,9 @@ public function getURI() private function getPost() { $url = $this->getAPIURI(); - $jsonPost = getContents($url) or returnClientError('Could not retrieve post metadata.'); + $jsonPost = getContents($url); $post = json_decode($jsonPost); + return $post; } diff --git a/bridges/HardwareInfoBridge.php b/bridges/HardwareInfoBridge.php deleted file mode 100644 index dc32c33a04a..00000000000 --- a/bridges/HardwareInfoBridge.php +++ /dev/null @@ -1,66 +0,0 @@ -collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10); - } - - protected function parseItem(array $item) - { - $itemUrl = $item['uri']; - $articlePage = getSimpleHTMLDOMCached($itemUrl); - - $article = $articlePage->find('div.article__content', 0); - - //everything under the social bar is not part of the article, remove it - $reachedEndOfArticle = false; - - foreach ($article->find('*') as $child) { - if ( - !$reachedEndOfArticle && isset($child->attr['class']) - && $child->attr['class'] == 'article__content__social-bar' - ) { - $reachedEndOfArticle = true; - } - - if ($reachedEndOfArticle) { - $child->outertext = ''; - } - } - - //get rid of some more elements we don't need - $to_remove_selectors = [ - 'script', - 'div.incontent', - 'div.article__content__social-bar', - 'div#revealNewsTip', - 'div.article__previous_next' - ]; - - foreach ($to_remove_selectors as $selector) { - foreach ($article->find($selector) as $found) { - $found->outertext = ''; - } - } - - // convert iframes to links. meant for embedded YouTube videos. - foreach ($article->find('iframe') as $found) { - $iframeUrl = $found->getAttribute('src'); - - if ($iframeUrl) { - $found->outertext = '' . $iframeUrl . ''; - } - } - - $item['content'] = $article; - return $item; - } -} diff --git a/bridges/InternationalInstituteForStrategicStudiesBridge.php b/bridges/InternationalInstituteForStrategicStudiesBridge.php index b5b589ab271..9b82dbd5d6e 100644 --- a/bridges/InternationalInstituteForStrategicStudiesBridge.php +++ b/bridges/InternationalInstituteForStrategicStudiesBridge.php @@ -30,7 +30,7 @@ public function collectData() ]; $headers = [ 'Accept: application/json, text/plain, */*', - 'Content-Type: application/json;charset=UTF-8' + 'Content-Type: application/json;charset=UTF-8', ]; $json = getContents($url, $headers, $opts); $data = json_decode($json); diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index 4f4145742a4..506805f7ebb 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -669,11 +669,11 @@ private function getData(string $url, bool $cache = false, bool $getJSON = false if ($cache) { $data = $this->loadCacheValue($url); if (is_null($data)) { - $data = getContents($url, $httpHeaders, $curlOptions) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions); $this->saveCacheValue($url, $data); } } else { - $data = getContents($url, $httpHeaders, $curlOptions) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions); } return json_decode($data, true); } else { //get simpleHTMLDOM object diff --git a/bridges/KilledbyGoogleBridge.php b/bridges/KilledbyGoogleBridge.php index 54c5b59f8d7..7b8f7f6e135 100644 --- a/bridges/KilledbyGoogleBridge.php +++ b/bridges/KilledbyGoogleBridge.php @@ -12,8 +12,7 @@ class KilledbyGoogleBridge extends BridgeAbstract public function collectData() { - $json = getContents(self::URI . '/graveyard.json') - or returnServerError('Could not request: ' . self::URI . '/graveyard.json'); + $json = getContents(self::URI . '/graveyard.json'); $this->handleJson($json); $this->orderItems(); diff --git a/bridges/LegoIdeasBridge.php b/bridges/LegoIdeasBridge.php index c4361f1fb56..e983e56d5cd 100644 --- a/bridges/LegoIdeasBridge.php +++ b/bridges/LegoIdeasBridge.php @@ -52,8 +52,7 @@ public function collectData() CURLOPT_POST => 1, CURLOPT_POSTFIELDS => $this->getHttpPostData() ]; - $responseData = getContents($this->getHttpPostURI(), $header, $opts) or - returnServerError('Unable to query Lego Ideas API.'); + $responseData = getContents($this->getHttpPostURI(), $header, $opts); foreach (json_decode($responseData)->results as $project) { preg_match('/datetime=\"(\S+)\"/', $project->entity->published_at, $date_matches); diff --git a/bridges/Mailman2Bridge.php b/bridges/Mailman2Bridge.php index ad0d81102ee..6b620c033ea 100644 --- a/bridges/Mailman2Bridge.php +++ b/bridges/Mailman2Bridge.php @@ -3,7 +3,7 @@ class Mailman2Bridge extends BridgeAbstract { const NAME = 'Mailman2Bridge'; - const URI = 'https://list.org/'; + const URI = 'https://list.org'; const MAINTAINER = 'imagoiq'; const CACHE_TIMEOUT = 60 * 30; // 30m const DESCRIPTION = 'Fetch latest messages from Mailman 2 archive (Pipermail)'; @@ -68,7 +68,7 @@ public function collectData() throw new \Exception('Failed to gzdecode'); } } - $mboxParts = preg_split('/^From /', $data); + $mboxParts = preg_split('/^From\s.+\d{2}:\d{2}:\d{2}\s\d{4}$/m', $data); // Drop the first element which is always an empty string array_shift($mboxParts); $mboxMails = array_reverse($mboxParts); diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php index aa32f4baa66..48157921d19 100644 --- a/bridges/NordbayernBridge.php +++ b/bridges/NordbayernBridge.php @@ -53,6 +53,19 @@ class NordbayernBridge extends BridgeAbstract ] ]]; + public function collectData() + { + $region = $this->getInput('region'); + if ($region === 'rothenburg-o-d-t') { + $region = 'rothenburg-ob-der-tauber'; + } + $url = self::URI . '/region/' . $region; + $listSite = getSimpleHTMLDOM($url); + + $this->handleNewsblock($listSite); + } + + private function getValidImage($picture) { $img = $picture->find('img', 0); @@ -75,23 +88,25 @@ private function getUseFullContent($rawContent) ) { $content .= $element; } elseif ($element->tag === 'main') { - $content .= self::getUseFullContent($element->find('article', 0)); + $content .= $this->getUseFullContent($element->find('article', 0)); } elseif ($element->tag === 'header') { - $content .= self::getUseFullContent($element); + $content .= $this->getUseFullContent($element); } elseif ( $element->tag === 'div' && !str_contains($element->class, 'article__infobox') && !str_contains($element->class, 'authorinfo') ) { - $content .= self::getUseFullContent($element); + $content .= $this->getUseFullContent($element); } elseif ( $element->tag === 'section' && (str_contains($element->class, 'article__richtext') || str_contains($element->class, 'article__context')) ) { - $content .= self::getUseFullContent($element); + $content .= $this->getUseFullContent($element); } elseif ($element->tag === 'picture') { - $content .= self::getValidImage($element); + $content .= $this->getValidImage($element); + } elseif ($element->tag === 'ul') { + $content .= $element; } } return $content; @@ -144,8 +159,8 @@ private function getArticle($link) // of the title image. If we didn't do this some rss programs // would show the subtitle of the title image as teaser instead // of the actuall article teaser. - $item['content'] .= self::getTeaser($content); - $item['content'] .= self::getUseFullContent($content); + $item['content'] .= $this->getTeaser($content); + $item['content'] .= $this->getUseFullContent($content); } @@ -167,7 +182,7 @@ private function handleNewsblock($listSite) continue; } - $item = self::getArticle($url); + $item = $this->getArticle($url); // exclude police reports if desired if ( @@ -188,16 +203,4 @@ private function handleNewsblock($listSite) $this->items[] = $item; } } - - public function collectData() - { - $region = $this->getInput('region'); - if ($region === 'rothenburg-o-d-t') { - $region = 'rothenburg-ob-der-tauber'; - } - $url = self::URI . '/region/' . $region; - $listSite = getSimpleHTMLDOM($url); - - self::handleNewsblock($listSite); - } } diff --git a/bridges/NovayaGazetaEuropeBridge.php b/bridges/NovayaGazetaEuropeBridge.php index ec288f2b64c..89d31a944e0 100644 --- a/bridges/NovayaGazetaEuropeBridge.php +++ b/bridges/NovayaGazetaEuropeBridge.php @@ -41,6 +41,9 @@ public function collectData() $data = json_decode($json); foreach ($data->records as $record) { + if (!isset($record->blocks)) { + continue; + } foreach ($record->blocks as $block) { if (!property_exists($block, 'date')) { continue; diff --git a/bridges/NurembergerNachrichtenBridge.php b/bridges/NurembergerNachrichtenBridge.php new file mode 100644 index 00000000000..10644212525 --- /dev/null +++ b/bridges/NurembergerNachrichtenBridge.php @@ -0,0 +1,178 @@ + [ + 'name' => 'region', + 'type' => 'list', + 'exampleValue' => 'Nürnberg', + 'title' => 'Select a region', + 'values' => [ + 'Ansbach' => 'ansbach', + 'Erlangen' => 'erlangen', + 'Erlangen-Höchstadt' => 'erlangen-hoechstadt', + 'Forchheim' => 'forchheim', + 'Fürth' => 'fuerth', + 'Gunzenhausen' => 'gunzenhausen', + 'Neumarkt' => 'neumarkt', + 'Neustadt/Aisch-Bad Windsheim' => 'neustadt-aisch-bad-windsheim', + 'Nürnberg' => 'nuernberg', + 'Nürnberger Land' => 'nuernberger-land', + 'Pegnitz' => 'pegnitz', + 'Roth' => 'roth', + 'Schwabach' => 'schwabach', + 'Weißenburg' => 'weissenburg' + ] + ], + 'hideNNPlus' => [ + 'name' => 'Hide NN+ articles', + 'type' => 'checkbox', + 'exampleValue' => 'unchecked', + 'title' => 'Hide all paywall articles on NN' + ], + ]]; + + public function collectData() + { + $region = $this->getInput('region'); + if ( + $region === 'neustadt-aisch-bad-windsheim' || + $region === 'erlangen-hoechstadt' || + $region === '' + ) { + $region = 'region/' . $region; + } + $url = self::URI . '/' . $region; + $listSite = getSimpleHTMLDOM($url); + + $this->handleNewsblock($listSite); + } + + private function handleNewsblock($listSite) + { + $main = $listSite->find('main', 0); + foreach ($main->find('article') as $article) { + $url = $article->find('a', 0)->href; + $url = urljoin(self::URI, $url); + + $articleContent = getSimpleHTMLDOMCached($url, 86400 * 7); + + // exclude nn+ articles if desired + if ( + $this->getInput('hideNNPlus') && + str_contains($articleContent->find('article[id=article]', 0)->find('header', 0), 'icon-nnplus') + ) { + continue; + } + + $item = $this->parseArticle($articleContent, $url); + $articleContent->clear(); + + $this->items[] = $item; + } + } + + private function parseArticle($article, $link) + { + $item = []; + defaultLinkTo($article, self::URI); + + $item['uri'] = $link; + + $author = $article->find('.article__author', 1); + if ($author !== null) { + $item['author'] = trim($author->plaintext); + } + + $createdAt = $article->find('[class=article__release]', 0); + if ($createdAt) { + $item['timestamp'] = strtotime(str_replace('Uhr', '', $createdAt->plaintext)); + } + + if ($article->find('h2', 0) === null) { + $item['title'] = $article->find('h3', 0)->innertext; + } else { + $item['title'] = $article->find('h2', 0)->innertext; + } + $item['content'] = ''; + + if ($article->find('section[class*=article__richtext]', 0) === null) { + $content = $article->find('div[class*=modul__teaser]', 0)->find('p', 0); + $item['content'] .= $content; + } else { + $content = $article->find('article', 0); + // change order of article teaser in order to show it on top + // of the title image. If we didn't do this some rss programs + // would show the subtitle of the title image as teaser instead + // of the actuall article teaser. + $item['content'] .= $this->getTeaser($content); + $item['content'] .= $this->getUseFullContent($content); + } + + return $item; + } + + private function getTeaser($content) + { + $teaser = $content->find('p[class=article__teaser]', 0); + if ($teaser === null) { + return ''; + } + $teaser = $teaser->plaintext; + $teaser = preg_replace('/[ ]{2,}/', ' ', $teaser); + $teaser = ''; + return $teaser; + } + + private function getUseFullContent($rawContent) + { + $content = ''; + foreach ($rawContent->children as $element) { + if ( + ($element->tag === 'p' || $element->tag === 'h3') && + $element->class !== 'article__teaser' + ) { + $content .= $element; + } elseif ($element->tag === 'main') { + $content .= $this->getUseFullContent($element->find('article', 0)); + } elseif ($element->tag === 'header') { + $content .= $this->getUseFullContent($element); + } elseif ( + $element->tag === 'div' && + !str_contains($element->class, 'article__infobox') && + !str_contains($element->class, 'authorinfo') + ) { + $content .= $this->getUseFullContent($element); + } elseif ( + $element->tag === 'section' && + (str_contains($element->class, 'article__richtext') || + str_contains($element->class, 'article__context')) + ) { + $content .= $this->getUseFullContent($element); + } elseif ($element->tag === 'picture') { + $content .= $this->getValidImage($element); + } elseif ($element->tag === 'ul') { + $content .= $element; + } + } + return $content; + } + + private function getValidImage($picture) + { + $img = $picture->find('img', 0); + if ($img) { + $imgUrl = $img->src; + if (!preg_match('#/logo-.*\.png#', $imgUrl)) { + return '
    '; + } + } + return ''; + } +} diff --git a/bridges/OpenCVEBridge.php b/bridges/OpenCVEBridge.php index 594bb9ece3c..b5fc852b1cc 100644 --- a/bridges/OpenCVEBridge.php +++ b/bridges/OpenCVEBridge.php @@ -147,10 +147,9 @@ public function collectData() for ($i = 1; $i <= $this->getInput('pages'); $i++) { $queryPaginated = array_merge($query, ['page' => $i]); $url = $instance . '/api/cve?' . http_build_query($queryPaginated); - $response = getContents( - $url, - [$authHeader] - ); + + $response = getContents($url, [$authHeader]); + $titlePrefix = ''; if (count($queries) > 1) { $titlePrefix = '[' . $queryName . '] '; @@ -205,10 +204,8 @@ private function getTitle($titlePrefix, $cveItem) private function fetchContents($cveItem, $titlePrefix, $instance, $authHeader) { $url = $instance . '/api/cve/' . $cveItem->id; - $response = getContents( - $url, - [$authHeader] - ); + + $response = getContents($url, [$authHeader]); $datum = json_decode($response); $title = $this->getTitleFromDatum($datum, $titlePrefix); diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 6e41cf20745..4e9ab0b53f7 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -191,15 +191,12 @@ protected function collectDataTalk() } } - /** - * Extract the cookies obtained from the URL - * @return array the array containing the cookies set by the URL - */ private function getCookiesHeaderValue($url) { $response = getContents($url, [], [], true); - $setCookieHeaders = $response['headers']['set-cookie'] ?? []; + $setCookieHeaders = $response->getHeader('set-cookie', true); $cookies = array_map(fn($c): string => explode(';', $c)[0], $setCookieHeaders); + return implode('; ', $cookies); } diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index 604b5d4bed3..820b3a7c21d 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -332,21 +332,20 @@ private function getData(string $url, bool $cache = true, bool $getJSON = false, } if ($cache) { - $data = $this->loadCacheValue($url); - if (!$data) { - $data = getContents($url, $httpHeaders, $curlOptions, true); - $this->saveCacheValue($url, $data); + $response = $this->loadCacheValue($url); + if (!$response || is_array($response)) { + $response = getContents($url, $httpHeaders, $curlOptions, true); + $this->saveCacheValue($url, $response); } } else { - $data = getContents($url, $httpHeaders, $curlOptions, true); + $response = getContents($url, $httpHeaders, $curlOptions, true); } - $this->checkCookie($data['headers']); + $this->checkCookie($response->getHeaders()); if ($getJSON) { - return json_decode($data['content'], true); - } else { - return $data['content']; + return json_decode($response->getBody(), true); } + return $response->getBody(); } } diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php index 77495a3cf7a..d725e3e9655 100644 --- a/bridges/RainbowSixSiegeBridge.php +++ b/bridges/RainbowSixSiegeBridge.php @@ -22,7 +22,7 @@ public function collectData() $dlUrl = $dlUrl . '&limit=6&mediaFilter=all&skip=0&startIndex=0&tags=BR-rainbow-six%20GA-siege'; $dlUrl = $dlUrl . '&locale=en-us&fallbackLocale=en-us&environment=master'; $jsonString = getContents($dlUrl, [ - 'Authorization: ' . self::NIMBUS_API_KEY + 'Authorization: ' . self::NIMBUS_API_KEY, ]); $json = json_decode($jsonString, true); diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 7ece0e15b30..ef74fdcdf34 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -10,6 +10,7 @@ class RedditBridge extends BridgeAbstract const MAINTAINER = 'dawidsowa'; const NAME = 'Reddit Bridge'; const URI = 'https://old.reddit.com'; + const CACHE_TIMEOUT = 60 * 60 * 2; // 2h const DESCRIPTION = 'Return hot submissions from Reddit'; const PARAMETERS = [ @@ -107,9 +108,8 @@ public function collectData() // 403 Forbidden // This can possibly mean that reddit has permanently blocked this server's ip address $this->cache->set($forbiddenKey, true, 60 * 61); - } - if ($e->getCode() === 429) { - $this->cache->set($rateLimitKey, true, 60 * 16); + } elseif ($e->getCode() === 429) { + $this->cache->set($rateLimitKey, true, 60 * 61); } throw $e; } @@ -143,10 +143,14 @@ private function collectDataInternal(): void $flareInput = $this->getInput('f'); foreach ($subreddits as $subreddit) { - $version = 'v0.0.1'; + $version = 'v0.0.2'; $useragent = "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"; $url = self::createUrl($search, $flareInput, $subreddit, $user, $section, $this->queriedContext); - $json = getContents($url, ['User-Agent: ' . $useragent]); + + $response = getContents($url, ['User-Agent: ' . $useragent], [], true); + + $json = $response->getBody(); + $parsedJson = Json::decode($json, false); foreach ($parsedJson->data->children as $post) { diff --git a/bridges/ReutersBridge.php b/bridges/ReutersBridge.php index fdf4e2a9358..07b3061cc8c 100644 --- a/bridges/ReutersBridge.php +++ b/bridges/ReutersBridge.php @@ -417,9 +417,11 @@ private function handleArticleContent($contents) $get_embed_url = 'https://publish.twitter.com/oembed?url=' . urlencode($tweet_url) . '&partner=&hide_thread=false'; + $oembed_json = json_decode(getContents($get_embed_url), true); $embed .= $oembed_json['html']; - } catch (Exception $e) { // In case not found any tweet. + } catch (\Exception $e) { + // In case not found any tweet. $embed .= ''; } break; diff --git a/bridges/RoadAndTrackBridge.php b/bridges/RoadAndTrackBridge.php index c236036cd65..eb2dcc53226 100644 --- a/bridges/RoadAndTrackBridge.php +++ b/bridges/RoadAndTrackBridge.php @@ -68,9 +68,4 @@ private function fetchArticle($articleLink) $item['content'] = $content; return $item; } - - private function getArticleContent($article) - { - return getContents($article->contentUrl); - } } diff --git a/bridges/RumbleBridge.php b/bridges/RumbleBridge.php index f6bfca7d193..a8841e005bf 100644 --- a/bridges/RumbleBridge.php +++ b/bridges/RumbleBridge.php @@ -2,10 +2,10 @@ class RumbleBridge extends BridgeAbstract { - const NAME = 'rumble.com bridge'; - const URI = 'https://rumble.com'; - const DESCRIPTION = 'Fetches the latest channel/user videos'; - const MAINTAINER = 'dvikan'; + const NAME = 'Rumble.com Bridge'; + const URI = 'https://rumble.com/'; + const DESCRIPTION = 'Fetches the latest channel/user videos and livestreams.'; + const MAINTAINER = 'dvikan, NotsoanoNimus'; const CACHE_TIMEOUT = 60 * 60; // 1h const PARAMETERS = [ [ @@ -13,15 +13,19 @@ class RumbleBridge extends BridgeAbstract 'name' => 'Account', 'type' => 'text', 'required' => true, + 'title' => 'Name of the target account to create into a feed.', 'defaultValue' => 'bjornandreasbullhansen', ], 'type' => [ + 'name' => 'Account Type', 'type' => 'list', - 'name' => 'Type', + 'title' => 'The type of profile to create a feed from.', 'values' => [ - 'Channel' => 'channel', - 'User' => 'user', - ] + 'Channel (All)' => 'channel', + 'Channel Videos' => 'channel-videos', + 'Channel Livestreams' => 'channel-livestream', + 'User (All)' => 'user', + ], ], ] ]; @@ -30,12 +34,28 @@ public function collectData() { $account = $this->getInput('account'); $type = $this->getInput('type'); + $url = self::getURI(); - if ($type === 'channel') { - $url = "https://rumble.com/c/$account"; + if (!preg_match('#^[\w\-_.@]+$#', $account) || strlen($account) > 64) { + throw new \Exception('Invalid target account.'); } - if ($type === 'user') { - $url = "https://rumble.com/user/$account"; + + switch ($type) { + case 'user': + $url .= "user/$account"; + break; + case 'channel': + $url .= "c/$account"; + break; + case 'channel-videos': + $url .= "c/$account/videos"; + break; + case 'channel-livestream': + $url .= "c/$account/livestreams"; + break; + default: + // Shouldn't ever happen. + throw new \Exception('Invalid media type.'); } $dom = getSimpleHTMLDOM($url); @@ -57,6 +77,9 @@ public function collectData() public function getName() { - return 'Rumble.com ' . $this->getInput('account'); + if ($this->getInput('account')) { + return 'Rumble.com - ' . $this->getInput('account'); + } + return self::NAME; } } diff --git a/bridges/RutubeBridge.php b/bridges/RutubeBridge.php index 452dbde4def..39577575fd1 100644 --- a/bridges/RutubeBridge.php +++ b/bridges/RutubeBridge.php @@ -24,6 +24,13 @@ class RutubeBridge extends BridgeAbstract 'required' => true ], ], + 'По результатам поиска' => [ + 's' => [ + 'name' => 'Запрос', + 'exampleValue' => 'SUREN', + 'required' => true, + ] + ] ]; protected $title; @@ -34,6 +41,8 @@ public function getURI() return self::URI . '/channel/' . strval($this->getInput('c')) . '/videos/'; } elseif ($this->getInput('p')) { return self::URI . '/plst/' . strval($this->getInput('p')) . '/'; + } elseif ($this->getInput('s')) { + return self::URI . '/search/?suggest=1&query=' . strval($this->getInput('s')); } else { return parent::getURI(); } @@ -60,7 +69,7 @@ private function getJSONData($html) return json_decode(str_replace('\x', '\\\x', $matches[1])); } - public function collectData() + private function getVideosFromReduxState() { $link = $this->getURI(); @@ -71,8 +80,30 @@ public function collectData() $videos = $reduxState->userChannel->videos->results; $this->title = $reduxState->userChannel->info->name; } elseif ($this->getInput('p')) { - $videos = $reduxState->playlist->data->results; - $this->title = $reduxState->playlist->title; + $playListVideosMethod = 'getPlaylistVideos(' . $this->getInput('p') . ')'; + $videos = $reduxState->api->queries->$playListVideosMethod->data->results; + $playListMethod = 'getPlaylist(' . $this->getInput('p') . ')'; + $this->title = $reduxState->api->queries->$playListMethod->data->title; + } elseif ($this->getInput('s')) { + $this->title = 'Поиск ' . $this->getInput('s'); + } + + return $videos; + } + + private function getVideosFromSearchAPI() + { + $contents = getContents(self::URI . '/api/search/video/?suggest=1&client=wdp&query=' . $this->getInput('s')); + $json = json_decode($contents); + return $json->results; + } + + public function collectData() + { + if ($this->getInput('c') || $this->getInput('p')) { + $videos = $this->getVideosFromReduxState(); + } else { + $videos = $this->getVideosFromSearchAPI(); } foreach ($videos as $video) { diff --git a/bridges/SpotifyBridge.php b/bridges/SpotifyBridge.php index 259480114c3..e03d43a1332 100644 --- a/bridges/SpotifyBridge.php +++ b/bridges/SpotifyBridge.php @@ -286,9 +286,9 @@ private function fetchAccessToken() } else { $basicAuth = base64_encode(sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'))); $json = getContents('https://accounts.spotify.com/api/token', [ - "Authorization: Basic $basicAuth" + "Authorization: Basic $basicAuth", ], [ - CURLOPT_POSTFIELDS => 'grant_type=client_credentials' + CURLOPT_POSTFIELDS => 'grant_type=client_credentials', ]); $data = Json::decode($json); $this->token = $data['access_token']; diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php new file mode 100644 index 00000000000..13eea02ebec --- /dev/null +++ b/bridges/SubstackBridge.php @@ -0,0 +1,50 @@ + [ + 'required' => false, + ] + ]; + + const PARAMETERS = [ + '' => [ + 'url' => [ + 'name' => 'Substack RSS URL', + 'required' => true, + 'type' => 'text', + 'defaultValue' => 'https://newsletter.pragmaticengineer.com/feed', + 'title' => 'Usually https:///feed' + ] + ] + ]; + + public function collectData() + { + $headers = []; + if ($this->getOption('sid')) { + $url_parsed = parse_url($this->getInput('url')); + $authority = $url_parsed['host']; + $cookies = [ + 'ab_experiment_sampled=%22false%22', + 'substack.sid=' . $this->getOption('sid'), + 'substack.lli=1', + 'intro_popup_last_hidden_at=' . (new DateTime())->format('Y-m-d\TH:i:s.v\Z') + ]; + $headers = [ + 'Authority: ' . $authority, + 'Cache-Control: max-age=0', + 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Cookie: ' . implode('; ', $cookies) + ]; + } + $this->collectExpandableDatas($this->getInput('url'), -1, $headers); + } +} diff --git a/bridges/SummitsOnTheAirBridge.php b/bridges/SummitsOnTheAirBridge.php index 53bba7abda2..17431214c14 100644 --- a/bridges/SummitsOnTheAirBridge.php +++ b/bridges/SummitsOnTheAirBridge.php @@ -20,8 +20,12 @@ class SummitsOnTheAirBridge extends BridgeAbstract public function collectData() { - $header = ['Content-type:application/json']; - $opts = [CURLOPT_HTTPGET => 1]; + $header = [ + 'Content-type:application/json', + ]; + $opts = [ + CURLOPT_HTTPGET => 1, + ]; $json = getContents($this->getURI() . $this->getInput('c'), $header, $opts); $spots = json_decode($json, true); diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index d29553479ba..6c96dff7dd3 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -1,12 +1,12 @@ getInput('topic'); $limit = $this->getInput('limit'); - $url = self::URI . $topic . '/archives'; - $html = getSimpleHTMLDOM($url); - $entries_root = $html->find('div.content-center.mt-5', 0); - $added = 0; + + $url = self::URI . 'api/latest/' . $topic; + $response = getContents($url, [], [], true); + $location = $response->getHeader('Location'); + $locationUrl = Url::fromString($location); + + $this->extractItem($locationUrl); + + $archives_url = self::URI . $topic . '/archives'; + $archives_html = getSimpleHTMLDOM($archives_url); + $entries_root = $archives_html->find('div.content-center.mt-5', 0); foreach ($entries_root->children() as $child) { if ($child->tag != 'a') { continue; } - // Convert //2023-01-01 to unix timestamp - $date_items = explode('/', $child->href); - $date = strtotime(end($date_items)); - $item_url = self::URI . ltrim($child->href, '/'); - try { - $this->items[] = [ - 'uri' => self::URI . $child->href, - 'title' => $child->plaintext, - 'timestamp' => $date, - 'content' => $this->extractContent($item_url), - ]; - } catch (HttpException $e) { - continue; - } - $added++; - if ($added >= $limit) { + $this->extractItem(Url::fromString(self::URI . $child->href)); + if (count($this->items) >= $limit) { break; } } } + private function extractItem(Url $url) + { + $pathParts = explode('/', $url->getPath()); + $date = strtotime(end($pathParts)); + try { + [$content, $title] = $this->extractContent($url); + + $this->items[] = [ + 'uri' => (string) $url, + 'title' => $title, + 'timestamp' => $date, + 'content' => $content, + ]; + } catch (HttpException $e) { + // archive occasionally returns broken URLs + return; + } + } + private function extractContent($url) { - $html = getSimpleHTMLDOM($url); + $html = getSimpleHTMLDOMCached($url); $content = $html->find('div.content-center.mt-5', 0); if (!$content) { - throw new HttpException('Could not find content', 500); + throw new \Exception('Could not find content'); } $subscribe_form = $content->find('div.mt-5 > div > form', 0); if ($subscribe_form) { @@ -112,7 +124,7 @@ private function extractContent($url) } } } - - return $content->innertext; + $title = $content->find('h2', 0); + return [$content->innertext, $title->plaintext]; } } diff --git a/bridges/TwitterV2Bridge.php b/bridges/TwitterV2Bridge.php index 83bfae29eba..07af8301992 100644 --- a/bridges/TwitterV2Bridge.php +++ b/bridges/TwitterV2Bridge.php @@ -598,7 +598,7 @@ private static function compareTweetDate($tweet1, $tweet2) private function makeApiCall($api, $authHeaders, $params) { $uri = self::API_URI . $api . '?' . http_build_query($params); - $result = getContents($uri, $authHeaders, [], false); + $result = getContents($uri, $authHeaders); $data = json_decode($result); return $data; } diff --git a/bridges/UnogsBridge.php b/bridges/UnogsBridge.php index 486bac3d4da..7aff10c6d8c 100644 --- a/bridges/UnogsBridge.php +++ b/bridges/UnogsBridge.php @@ -92,7 +92,7 @@ private function getJSON($url) { $header = [ 'Referer: https://unogs.com/', - 'referrer: http://unogs.com' + 'referrer: http://unogs.com', ]; $raw = getContents($url, $header); diff --git a/bridges/Vk2Bridge.php b/bridges/Vk2Bridge.php index 0bc0879f94c..6fecba84c54 100644 --- a/bridges/Vk2Bridge.php +++ b/bridges/Vk2Bridge.php @@ -168,6 +168,12 @@ protected function generateContentFromPost($post) $ret .= "* {$text}: {$votes} ({$rate}%)
    "; } $ret .= '

    '; + } elseif ($attachment['type'] == 'album') { + $album = $attachment['album']; + $url = "https://vk.com/album{$album['owner_id']}_{$album['id']}"; + $title = 'Альбом: ' . $album['title']; + $photo = $this->getImageURLWithLargestWidth($album['thumb']['sizes']); + $ret .= "

    {$title}
    {$title}

    "; } elseif (!in_array($attachment['type'], ['video', 'audio', 'doc'])) { $ret .= "

    Unknown attachment type: {$attachment['type']}

    "; } diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 980b4154877..22957f26c4a 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -511,11 +511,11 @@ private function getContents() while ($redirects < 2) { $response = getContents($uri, $httpHeaders, [CURLOPT_FOLLOWLOCATION => false], true); - if (in_array($response['code'], [200, 304])) { - return $response['content']; + if (in_array($response->getCode(), [200, 304])) { + return $response->getBody(); } - $headers = $response['headers']; + $headers = $response->getHeaders(); $uri = urljoin(self::URI, $headers['location'][0]); if (str_contains($uri, '/429.html')) { diff --git a/caches/ArrayCache.php b/caches/ArrayCache.php index efce4f3579e..55b18519340 100644 --- a/caches/ArrayCache.php +++ b/caches/ArrayCache.php @@ -2,6 +2,9 @@ declare(strict_types=1); +/** + * Also known as an in-memory/runtime cache + */ class ArrayCache implements CacheInterface { private array $data = []; diff --git a/config.default.ini.php b/config.default.ini.php index 8f7de832120..2014b743120 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -8,23 +8,24 @@ ; Only these bridges are available for feed production ; How to enable all bridges: enabled_bridges[] = * -enabled_bridges[] = CssSelectorBridge -enabled_bridges[] = FeedMerge -enabled_bridges[] = FeedReducerBridge -enabled_bridges[] = Filter -enabled_bridges[] = GettrBridge -enabled_bridges[] = MastodonBridge -enabled_bridges[] = Reddit -enabled_bridges[] = RumbleBridge -enabled_bridges[] = SoundcloudBridge -enabled_bridges[] = Telegram -enabled_bridges[] = ThePirateBay -enabled_bridges[] = TikTokBridge -enabled_bridges[] = Twitch -enabled_bridges[] = Vk -enabled_bridges[] = XPathBridge -enabled_bridges[] = Youtube -enabled_bridges[] = YouTubeCommunityTabBridge +;enabled_bridges[] = CssSelectorBridge +;enabled_bridges[] = FeedMerge +;enabled_bridges[] = FeedReducerBridge +;enabled_bridges[] = Filter +;enabled_bridges[] = GettrBridge +;enabled_bridges[] = MastodonBridge +;enabled_bridges[] = Reddit +;enabled_bridges[] = RumbleBridge +;enabled_bridges[] = SoundcloudBridge +;enabled_bridges[] = Telegram +;enabled_bridges[] = ThePirateBay +;enabled_bridges[] = TikTokBridge +;enabled_bridges[] = Twitch +;enabled_bridges[] = Vk +;enabled_bridges[] = XPathBridge +;enabled_bridges[] = Youtube +;enabled_bridges[] = YouTubeCommunityTabBridge +enabled_bridges[] = * ; Defines the timezone used by RSS-Bridge ; Find a list of supported timezones at @@ -45,6 +46,13 @@ ; Whether to enable maintenance mode. If enabled, feed requests receive 503 Service Unavailable enable_maintenance_mode = false +; The default language to use for the application's web UI (locale and region; e.g. 'en-US'). +app_language = en-US + +; Whether to throw an app exception when a translation text item is missing from an i18n +; language file that has 'complete' set to 'true'. +enforce_complete_translations = true + [http] ; Operation timeout in seconds timeout = 15 @@ -58,6 +66,10 @@ ; Max http response size in MB max_filesize = 20 +; The default language to use in requests (locale and region; e.g. 'en-US'). +; Some bridges might manually override this setting to do things like bypass CloudFlare. +accept_language = en-US + [cache] ; Cache type: file, sqlite, memcached, array, null diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000000..9f178049a33 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +version: '2' +services: + rss-bridge: + image: rssbridge/rss-bridge:latest + volumes: + - ./config:/config + ports: + - 3000:80 + restart: unless-stopped diff --git a/docs/01_General/03_Requirements.md b/docs/01_General/03_Requirements.md index 1ae5aa2649f..617cfadc4cc 100644 --- a/docs/01_General/03_Requirements.md +++ b/docs/01_General/03_Requirements.md @@ -1,6 +1,4 @@ -**RSS-Bridge** requires either of the following: -## A Web server* with: - PHP 7.4 (or higher) - [`openssl`](https://secure.php.net/manual/en/book.openssl.php) extension @@ -14,11 +12,3 @@ - [`sqlite3`](http://php.net/manual/en/book.sqlite3.php) extension (only when using SQLiteCache) Enable extensions by un-commenting the corresponding line in your PHP configuration (`php.ini`). - - -## A Linux server with: - - - Docker server configured (Any recent version should do) - - 100MB of disk space - -To setup RSS Bridge using Docker, see the [Docker Guide](../03_For_Hosts/03_Docker_Installation.md) on installing RSS Bridge. \ No newline at end of file diff --git a/docs/03_For_Hosts/01_Installation.md b/docs/03_For_Hosts/01_Installation.md index 39df791833e..729e6abbdcb 100644 --- a/docs/03_For_Hosts/01_Installation.md +++ b/docs/03_For_Hosts/01_Installation.md @@ -7,6 +7,4 @@ In order to install RSS-Bridge on your own web server* do as follows: For linux hosts: * Grant read-write-access for `www-data` to the `./cache` directory (`chown -R www-data ./cache`) -You have successfully installed RSS-Bridge. - -Instructions for Docker setups are at [Docker Installation](../03_For_Hosts/03_Docker_Installation.md) \ No newline at end of file +You have successfully installed RSS-Bridge. \ No newline at end of file diff --git a/docs/03_For_Hosts/02_Updating.md b/docs/03_For_Hosts/02_Updating.md index 3ec9804913c..3484c6dccb5 100644 --- a/docs/03_For_Hosts/02_Updating.md +++ b/docs/03_For_Hosts/02_Updating.md @@ -8,10 +8,6 @@ Updating an existing installation is very simple, depending on your type of inst This will update all core files to the latest version. Your custom configuration and bridges are left untouched. Keep in mind that changes to any core file of RSS-Bridge will be replaced. -## Docker - -Simply get the latest Docker build via `:latest` or specific builds via `:`. - ## Heroku ### If you didn't fork the repo before diff --git a/docs/03_For_Hosts/03_Docker_Installation.md b/docs/03_For_Hosts/03_Docker_Installation.md deleted file mode 100644 index d895e748bfb..00000000000 --- a/docs/03_For_Hosts/03_Docker_Installation.md +++ /dev/null @@ -1,49 +0,0 @@ -This guide is for people who want to run RSS Bridge using Docker. If you want to run it a simple PHP Webhost environment, see [Installation](../03_For_Hosts/01_Installation.md) instead. - -## Setup - -### Create the container - -```bash -docker create \ ---name=rss-bridge \ ---volume :/config \ ---publish 3000:80 \ -rssbridge/rss-bridge:latest -``` -### Run it -```bash -docker start rss-bridge -``` - -Access it using `http://IP_Address:3000`. If you'd like to run a specific version, you can run it by changing the ':latest' on the image to a tag listed [here](https://hub.docker.com/r/rssbridge/rss-bridge/tags/) - -The server runs on port 80 internally, map any port of your choice (in this example 3000). - -You can run it using a `docker-compose.yml` as well: - -```yml -version: '2' -services: - rss-bridge: - image: rssbridge/rss-bridge:latest - volumes: - - :/config - ports: - - 3000:80 - restart: unless-stopped -``` - -# Container access and information - -|Function|Command| -|----|----| -|Shell access (live container)|`docker exec -it rss-bridge /bin/sh`| -|Realtime container logs|`docker logs -f rss-bridge`| - -# Adding custom bridges and configurations -If you want to add a bridge that is not part of [`/bridges`](https://github.com/RSS-Bridge/rss-bridge/tree/master/bridges), you can map a folder to the `/config` folder of the `rss-bridge` container. - -1. Create a folder in the location of your docker-compose.yml or your general docker working area (in this example it will be `/home/docker/rssbridge/config` ). -2. Copy your [custom bridges](../05_Bridge_API/01_How_to_create_a_new_bridge.md) to the `/home/docker/rssbridge/config` folder. Applies also to [config.ini.php](../03_For_Hosts/08_Custom_Configuration.md). -3. Map the folder to `/config` inside the container. To do that, replace the `` from the previous examples with `/home/docker/rssbridge/config` \ No newline at end of file diff --git a/docs/03_For_Hosts/index.md b/docs/03_For_Hosts/index.md index 1529cb37f23..b89f321adc0 100644 --- a/docs/03_For_Hosts/index.md +++ b/docs/03_For_Hosts/index.md @@ -1,11 +1,14 @@ This section is directed at **hosts** and **server administrators**. -To install RSS-Bridge, please follow the [installation instructions](../03_For_Hosts/01_Installation.md). You must have access to a web server with a working PHP environment! +To install RSS-Bridge, please follow the [installation instructions](../03_For_Hosts/01_Installation.md). +You must have access to a web server with a working PHP environment! -RSS-Bridge comes with a large amount of bridges. Only few bridges are enabled by default. Unlock more bridges by adding them to the [whitelist](../03_For_Hosts/05_Whitelisting.md). +RSS-Bridge comes with a large amount of bridges. -Some bridges could be implemented more efficiently by actually using proprietary APIs, but there are reasons against it: +Some bridges could be implemented more efficiently by actually using proprietary APIs, +but there are reasons against it: -- RSS-Bridge exists in the first place to NOT use APIs. See [the rant](https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md#Rant) +- RSS-Bridge exists in the first place to NOT use APIs. +- See [the rant](https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md#Rant) -- APIs require private keys that could be stored on servers running RSS-Bridge, which is a security concern, involves complex authorizations for inexperienced users and could cause harm (when using paid services for example). In a closed environment (a server only you use for yourself) however you might be interested in using them anyway. So, check [this](https://github.com/RSS-Bridge/rss-bridge/pull/478/files) possible implementation of an anti-captcha solution. \ No newline at end of file +- APIs require private keys that could be stored on servers running RSS-Bridge,which is a security concern, involves complex authorizations for inexperienced users and could cause harm (when using paid services for example). In a closed environment (a server only you use for yourself) however you might be interested in using them anyway. So, check [this](https://github.com/RSS-Bridge/rss-bridge/pull/478/files) possible implementation of an anti-captcha solution. \ No newline at end of file diff --git a/docs/04_For_Developers/05_Debug_mode.md b/docs/04_For_Developers/05_Debug_mode.md index 6bdb1d484a0..7d503acdd42 100644 --- a/docs/04_For_Developers/05_Debug_mode.md +++ b/docs/04_For_Developers/05_Debug_mode.md @@ -1,6 +1,7 @@

    Warning!

    -Enabling debug mode on a public server may result in malicious clients retrieving sensitive data about your server and possibly gaining access to it. Do not enable debug mode on a public server, unless you understand the implications of your doing! +Enabling debug mode on a public server may result in malicious clients retrieving sensitive data about your server and possibly gaining access to it. +Do not enable debug mode on a public server, unless you understand the implications of your doing! *** @@ -20,14 +21,3 @@ _Notice_: * The bridge whitelist still applies! (debug mode does **not** enable all bridges) RSS-Bridge will give you a visual feedback when debug mode is enabled. - -While debug mode is active, RSS-Bridge will write additional data to your servers `error.log`. - -Debug mode is controlled by the static class `Debug`. It provides three core functions: - -* `Debug::isEnabled()`: Returns `true` if debug mode is enabled. -* `Debug::log($message)`: Adds a message to `error.log`. It takes one parameter, which can be anything. - -Example: `Debug::log('Hello World!');` - -**Notice**: `Debug::log($message)` calls `Debug::isEnabled()` internally. You don't have to do that manually. \ No newline at end of file diff --git a/docs/04_For_Developers/07_Development_Environment_Setup.md b/docs/04_For_Developers/07_Development_Environment_Setup.md index 23a4b1011fb..d3a5ee8d1ef 100644 --- a/docs/04_For_Developers/07_Development_Environment_Setup.md +++ b/docs/04_For_Developers/07_Development_Environment_Setup.md @@ -1,39 +1,5 @@ -These are examples of how to setup a local development environment to add bridges, improve the docs, etc. -## Docker - -The following can serve as an example for using docker: - -``` -# create a new directory -mkdir rss-bridge-contribution -cd rss-bridge-contribution - -# clone the project into a subfolder -git clone https://github.com/RSS-Bridge/rss-bridge -``` - -Then add a `docker-compose.yml` file: - -```yml -version: '3' - -services: - rss-bridge: - build: - context: ./rss-bridge - ports: - - 3000:80 - volumes: - - ./config:/config - - ./rss-bridge/bridges:/app/bridges -``` - -You can then access RSS-Bridge at `localhost:3000` and [add your bridge](../05_Bridge_API/How_to_create_a_new_bridge) to the `rss-bridge/bridges` folder. - -If you need to edit any other files, like from the `lib` folder add this to the `volumes` section: `./rss-bridge/lib:/app/lib`. - -### Docs with Docker +## Docs with Docker If you want to edit the docs add this to your docker-compose.yml: diff --git a/docs/10_Bridge_Specific/Economist.md b/docs/10_Bridge_Specific/Economist.md new file mode 100644 index 00000000000..d11ad43c3cf --- /dev/null +++ b/docs/10_Bridge_Specific/Economist.md @@ -0,0 +1,21 @@ +# EconomistWorldInBriefBridge and EconomistBridge + +In May 2024, The Economist finally fixed its paywall, and it started requiring authorization. Which means you can't use this bridge unless you have an active subscription. + +If you do, the way to use the bridge is to snitch a cookie: +1. Log in to The Economist +2. Open DevTools (Chrome DevTools or Firefox Developer Tools) +2. Go to https://www.economist.com/the-world-in-brief +3. In DevTools, go to the "Network" tab, there select the first request (`the-world-in-brief`) and copy the value of the `Cookie:` header from "Request Headers". + +The cookie lives three months. + +Once you've done this, add the cookie to your `config.ini.php`: + +``` +[EconomistWorldInBriefBridge] +cookie = "" + +[EconomistBridge] +cookie = "" +``` diff --git a/docs/10_Bridge_Specific/Substack.md b/docs/10_Bridge_Specific/Substack.md new file mode 100644 index 00000000000..7595bbefaab --- /dev/null +++ b/docs/10_Bridge_Specific/Substack.md @@ -0,0 +1,18 @@ +# SubstackBridge + +[Substack](https://substack.com) provides RSS feeds at `/feed` path, e.g., https://newsletter.pragmaticengineer.com/feed/. However, these feeds have two problems, addressed by this bridge: +- They use RSS 2.0 with the draft [content extension](https://web.resource.org/rss/1.0/modules/content/), which isn't supported by some readers; +- They don't have the full content for paywalled posts. + +Retrieving the full content is only possible _with an active subscription to the blog_. If you have one, Substack will return the full feed if it's fetched with the right set of cookies. Figuring out whether it's the intended behaviour is left as an exercise for the reader. + +To obtain the session cookie, authorize at https://substack.com/, open DevTools, go to Application -> Cookies -> https://substack.com, copy the value of `substack.sid` and paste it to the RSS bridge config: + +``` +[SubstackBridge] +sid = "" +``` + +Authorization sometimes requires CAPTCHA, hence this operation is manual. The cookie lives for three months. + +After you've done this, the bridge should return full feeds for your subscriptions. diff --git a/i18n/en-US.php b/i18n/en-US.php new file mode 100644 index 00000000000..09807a2df1f --- /dev/null +++ b/i18n/en-US.php @@ -0,0 +1,198 @@ + true, /* is this translation complete? */ + 'ltr' => true, /* left-to-right */ + 'frontpage' => [ /* content on the main page */ + 'findfeed_button' => 'Find Feed from URL', + 'searchfield_placeholder' => 'Insert URL or bridge name', + 'active_bridges' => 'active bridges', + ], + 'bridge_default' => [ /* defaults for the BridgeAbstract */ + 'name' => 'Unnamed bridge', + 'description' => 'No description provided.', + 'no_maintainer' => 'No maintainer', + ], + 'bridge_card' => [ /* for the class by the same name */ + 'proxy_disable' => 'Disable proxy', + 'cache_timeout' => 'Cache timeout in seconds', + 'example_right_click' => 'Example (right click to use)', + 'generate_feed' => 'generate feed', + ], + 'bridge_error' => [ /* the bridge-error template */ + 'find_similar_bugs' => 'Find similar bugs', + 'find_similar_bugs_title' => 'Opens GitHub to search for similar issues', + 'create_github_issue' => 'Create a GitHub issue', + 'create_github_issue_title' => 'After clicking this button you can review the issue before submitting it', + ], + 'connectivity' => [ /* the connectivity template */ + 'search_for_bridge' => 'Search for bridge...', + ], + 'default_inputs' => [ + 'limit' => [ + 'name' => 'limit', + 'title' => 'Maximum number of items to return', + ], + ], + 'errors' => [ /* error messages */ + 'general' => [ + 'missing_config_option' => 'Missing configuration option: %s', + 'invalid_context' => 'Invalid parameters value(s).', + 'invalid_context_args' => 'Invalid parameters value(s): %s', + 'missing_context' => 'Required parameter(s) missing.', + 'mixed_context' => 'Mixed context parameters.', + 'no_bridges_enabled' => 'No bridges are enabled...', + 'whitelist' => 'This bridge is not whitelisted.', + 'format' => 'You must specify a format.', + 'not_found' => 'Bridge not found.', + 'not_found_named' => 'Bridge "%s" was not found.', + 'not_found_for_url' => 'No bridge was found for the given URL', + 'missing_parameter' => 'Missing bridge parameter.', + 'specify_url' => 'You must specify a URL.', + 'specify_format' => 'You must specify a format.', + ], + 'url' => [ + 'illegal' => 'Illegal URL: "%s"', + 'parse' => 'Failed to parse URL: "%s"', + 'scheme' => 'Invalid scheme: "%s"', + 'path_slash' => 'Path must start with forward slash: "%s"', + ], + 'cache' => [ + 'no_type' => 'No cache type is configured.', + 'bad_name' => 'Invalid cache name: "%s"', + 'bad_classname' => 'Invalid cache classname: "%s"', + 'filecache_path_not_found' => 'The FileCache path at "%s" does not exist.', + 'filecache_not_writable' => 'The FileCache path at "%s" is not writable.', + 'not_loaded' => 'The "%s" extension is not loaded. Please check "php.ini".', + 'path_not_writeable' => 'The cache folder is not writable.', + 'config_missing' => 'The configuration for %s is missing.', + 'config_invalid' => 'The configuration for %s is invalid.', + 'param_not_set' => '"%s" parameter is not set for %s.', + 'param_invalid' => '"%s" parameter is invalid for %s.', + 'missing_file' => 'Unable to find the cache file.', + ], + 'expander' => [ + 'no_url' => 'There is no URL for this RSS expander.', + 'bad_xml_url' => 'Unable to parse XML from URL "%s" because the response was empty.', + 'bad_xml_url_msg' => 'Failed to parse XML from URL "%s": %s', + ], + 'parser' => [ + 'bad_xml_msg' => 'Unable to parse XML: %s', + 'feed_format' => 'Unable to detect feed format.', + ], + 'format' => [ + 'invalid_name' => 'Invalid format name: "%s"', + ], + 'actions' => [ + 'connectivity' => [ + 'debug_required' => 'This action is only available in debug mode.', + ], + 'display' => [ + 'cached' => 'This is a cached response.', + 'error' => 'Bridge returned error', + ], + 'findfeed' => [ + 'no_name_var' => 'Variable "%s" (No name provided)', + ], + ], + 'http_exceptions' => [ + 'e400' => [ + 'banner' => '400 Bad Request', + 'reason' => 'This is usually caused by an incorrectly constructed HTTP request.', + ], + 'e403' => [ + 'banner' => '403 Forbidden', + 'reason' => 'RSS-Bridge tried to fetch a page with a valid request but was refused by the server.', + ], + 'e404' => [ + 'banner' => '404 Page Not Found', + 'reason' => 'RSS-Bridge tried to fetch a page on a website but it was not found or returned by the server.', + ], + 'e429' => [ + 'banner' => '429 Too Many Requests', + 'reason' => 'RSS-Bridge tried to fetch a website and was directed to try again later.', + ], + 'e503' => [ + 'banner' => '503 Service Unavailable', + 'reason' => 'This is commonly caused when a server is down for maintenance or is overloaded.', + ], + ], + 'cloudflare' => [ + 'protected' => 'The website is protected by CloudFlare.', + 'reason' => 'RSS-Bridge tried to fetch a website, but it was blocked by CloudFlare. CloudFlare is anti-bot software whose purpose is to block non-human entities.', + ], + 'curl' => [ + 'see_libcurl_errors_doc' => 'See the cURL documentation for information about the error code:', + 'e10' => [ + 'banner' => 'The RSS feed is completely empty', + 'reason' => 'RSS-Bridge tried to parse the empty string as XML. Unfortunately, the fetched URL is not pointing to any real XML.', + ], + 'e11' => [ + 'banner' => 'Something is wrong with the RSS feed', + 'reason' => 'RSS-Bridge tried to parse XML and failed. The XML received is probably invalid.', + ], + ], + 'dom_empty' => 'Unable to parse the DOM because the HTTP response was empty.', + ], + 'misc' => [ /* miscellaneous words/text */ + 'show_more' => 'show more', + 'show_less' => 'show less', + 'active' => 'active', + 'inactive' => 'inactive', + 'search' => 'search', + 'details' => 'details', + 'attachments' => 'attachments', + 'categories' => 'categories', + 'trace' => 'trace', + 'context' => 'context', + 'version' => 'version', + 'query' => 'query', + 'os' => 'operating system', + 'email' => 'email', + 'token' => 'token', + 'telegram' => 'Telegram', + 'unknown' => 'unknown', + 'go_back' => 'go back', + 'back_to_frontpage' => 'go back to front page', + 'type' => 'type', + 'code' => 'code', + 'message' => 'message', + 'file' => 'file', + 'line' => 'line', + 'donate' => 'donate', + 'donate_maintainer' => 'Donate to Maintainer', + 'author_by' => 'by', + 'token_required' => 'Authentication with token required.', + 'all_is_good' => 'all is good', + ], +]; diff --git a/index.php b/index.php index 7144ae27528..687cd2dfdd5 100644 --- a/index.php +++ b/index.php @@ -2,42 +2,53 @@ if (version_compare(\PHP_VERSION, '7.4.0') === -1) { http_response_code(500); - print 'RSS-Bridge requires minimum PHP version 7.4'; - exit; + exit("RSS-Bridge requires minimum PHP version 7.4\n"); } -if (! is_readable(__DIR__ . '/lib/bootstrap.php')) { - http_response_code(500); - print 'Unable to read lib/bootstrap.php. Check file permissions.'; - exit; +require_once __DIR__ . '/lib/bootstrap.php'; + +$config = []; +if (file_exists(__DIR__ . '/config.ini.php')) { + $config = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED); + if (!$config) { + http_response_code(500); + exit("Error parsing config.ini.php\n"); + } } +Configuration::loadConfiguration($config, getenv()); -require_once __DIR__ . '/lib/bootstrap.php'; +I18n::load(); + +$logger = new SimpleLogger('rssbridge'); -set_exception_handler(function (\Throwable $e) { +set_exception_handler(function (\Throwable $e) use ($logger) { $response = new Response(render(__DIR__ . '/templates/exception.html.php', ['e' => $e]), 500); $response->send(); - RssBridge::getLogger()->error('Uncaught Exception', ['e' => $e]); + $logger->error('Uncaught Exception', ['e' => $e]); }); -set_error_handler(function ($code, $message, $file, $line) { +set_error_handler(function ($code, $message, $file, $line) use ($logger) { + // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); if ((error_reporting() & $code) === 0) { // Deprecation messages and other masked errors are typically ignored here return false; } - // In the future, uncomment this: - //throw new \ErrorException($message, 0, $code, $file, $line); + if (Debug::isEnabled()) { + // This might be annoying, but it's for the greater good + throw new \ErrorException($message, 0, $code, $file, $line); + } $text = sprintf( '%s at %s line %s', sanitize_root($message), sanitize_root($file), $line ); - RssBridge::getLogger()->warning($text); + $logger->warning($text); + // todo: return false to prevent default error handler from running? }); // There might be some fatal errors which are not caught by set_error_handler() or \Throwable. -register_shutdown_function(function () { +register_shutdown_function(function () use ($logger) { $error = error_get_last(); if ($error) { $message = sprintf( @@ -47,33 +58,30 @@ sanitize_root($error['file']), $error['line'] ); - RssBridge::getLogger()->error($message); - if (Debug::isEnabled()) { - // This output can interfere with json output etc - // This output is written at the bottom - print sprintf("
    %s
    \n", e($message)); - } + $logger->error($message); } }); -$errors = Configuration::checkInstallation(); -if ($errors) { - http_response_code(500); - print '
    ' . implode("\n", $errors) . '
    '; - exit; -} +$cacheFactory = new CacheFactory($logger); -// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); +if (Debug::isEnabled()) { + $logger->addHandler(new ErrorLogHandler(Logger::DEBUG)); + $cache = $cacheFactory->create('array'); +} else { + $logger->addHandler(new ErrorLogHandler(Logger::INFO)); + $cache = $cacheFactory->create(); +} +$httpClient = new CurlHttpClient(); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); try { - $rssBridge = new RssBridge(); + $rssBridge = new RssBridge($logger, $cache, $httpClient); $response = $rssBridge->main($argv ?? []); $response->send(); } catch (\Throwable $e) { // Probably an exception inside an action - RssBridge::getLogger()->error('Exception in RssBridge::main()', ['e' => $e]); - http_response_code(500); - print render(__DIR__ . '/templates/exception.html.php', ['e' => $e]); + $logger->error('Exception in RssBridge::main()', ['e' => $e]); + $response = new Response(render(__DIR__ . '/templates/exception.html.php', ['e' => $e]), 500); + $response->send(); } diff --git a/lib/ActionInterface.php b/lib/ActionInterface.php index c0ddcf9f44b..d2e1c709491 100644 --- a/lib/ActionInterface.php +++ b/lib/ActionInterface.php @@ -2,8 +2,5 @@ interface ActionInterface { - /** - * @return string|Response - */ - public function execute(Request $request); + public function __invoke(Request $request): Response; } diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 2467dec60e1..00327d9a2df 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -2,15 +2,22 @@ abstract class BridgeAbstract { - const NAME = 'Unnamed bridge'; + /* + * i18n note: because this abstract class uses CONST, translations here cannot be + * dynamic without fundamentally changing this structure. Instead, the text of each + * translated item is prepended with a 'magic' value from the i18n.php library. + * This causes the library to treat the text as a selector dynamically when calling + * the 'I18n::dynamic_select' method. + */ + const NAME = I18n::SELECTOR_MAGIC . 'bridge_default:name'; const URI = ''; const DONATION_URI = ''; - const DESCRIPTION = 'No description provided'; + const DESCRIPTION = I18n::SELECTOR_MAGIC . 'bridge_default:description'; /** * Preferably a github username */ - const MAINTAINER = 'No maintainer'; + const MAINTAINER = I18n::SELECTOR_MAGIC . 'bridge_default:no_maintainer'; /** * Cache TTL in seconds @@ -26,9 +33,9 @@ abstract class BridgeAbstract * Can be inlined and modified if necessary. */ protected const LIMIT = [ - 'name' => 'Limit', + 'name' => I18n::SELECTOR_MAGIC . 'default_inputs:limit:name', 'type' => 'number', - 'title' => 'Maximum number of items to return', + 'title' => I18n::SELECTOR_MAGIC . 'default_inputs:limit:title', ]; protected array $items = []; @@ -61,7 +68,7 @@ public function getFeed(): array public function getName() { - return static::NAME; + return I18n::dynamic_select(static::NAME); } public function getURI() @@ -93,12 +100,12 @@ public function getOption(string $name) */ public function getDescription() { - return static::DESCRIPTION; + return I18n::dynamic_select(static::DESCRIPTION); } public function getMaintainer(): string { - return static::MAINTAINER; + return I18n::dynamic_select(static::MAINTAINER); } /** @@ -131,7 +138,7 @@ public function loadConfiguration() } if (isset($optionValue['required']) && $optionValue['required'] === true) { - throw new \Exception(sprintf('Missing configuration option: %s', $optionName)); + throw new \Exception(xlat('errors:general:missing_config_option', $optionName)); } elseif (isset($optionValue['defaultValue'])) { $this->configuration[$optionName] = $optionValue['defaultValue']; } @@ -152,7 +159,7 @@ public function setInput(array $input) if (!$contexts) { if ($input) { - throw new \Exception('Invalid parameters value(s)'); + throw new \Exception(xlat('errors:general:invalid_context')); } return; } @@ -163,7 +170,12 @@ public function setInput(array $input) $errors = $validator->validateInput($input, $contexts); if ($errors !== []) { $invalidParameterKeys = array_column($errors, 'name'); - throw new \Exception(sprintf('Invalid parameters value(s): %s', implode(', ', $invalidParameterKeys))); + throw new \Exception( + xlat( + 'errors:general:invalid_context_args', + implode(', ', $invalidParameterKeys) + ) + ); } // Guess the context from input data @@ -173,9 +185,9 @@ public function setInput(array $input) } if (is_null($this->queriedContext)) { - throw new \Exception('Required parameter(s) missing'); + throw new \Exception(xlat('errors:general:missing_context')); } elseif ($this->queriedContext === false) { - throw new \Exception('Mixed context parameters'); + throw new \Exception(xlat('errors:general:mixed_context')); } $this->setInputWithContext($input, $this->queriedContext); diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index d15ac865e0a..61dc7fdcaeb 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -16,20 +16,22 @@ public static function render(string $bridgeClassName, Request $request): string if (Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge')) { $contexts['global']['_noproxy'] = [ - 'name' => 'Disable proxy (' . (Configuration::getConfig('proxy', 'name') ?: Configuration::getConfig('proxy', 'url')) . ')', + 'name' => xlat('bridge_card:proxy_disable') . ' (' + . (Configuration::getConfig('proxy', 'name') ?: Configuration::getConfig('proxy', 'url')) . ')', 'type' => 'checkbox' ]; } if (Configuration::getConfig('cache', 'custom_timeout')) { $contexts['global']['_cache_timeout'] = [ - 'name' => 'Cache timeout in seconds', + 'name' => xlat('bridge_card:cache_timeout'), 'type' => 'number', 'defaultValue' => $bridge->getCacheTimeout() ]; } $shortName = $bridge->getShortName(); + $showMore = ucfirst(xlat('misc:show_more')) ?: 'Show more'; $card = <<{$description}

    - + CARD; @@ -76,13 +78,15 @@ class="bridge-card" } } - $card .= sprintf('', $bridgeClassName); + $showLess = ucfirst(xlat('misc:show_less')) ?: 'Show less'; + $card .= sprintf('', $bridgeClassName, $showLess); if (Configuration::getConfig('admin', 'donations') && $bridge->getDonationURI()) { $card .= sprintf( - '

    %s ~ Donate

    ', + '

    %s ~ %s

    ', $bridge->getMaintainer(), - $bridge->getDonationURI() + $bridge->getDonationURI(), + xlat('misc:donate') ?: 'Donate' ); } else { $card .= sprintf('

    %s

    ', $bridge->getMaintainer()); @@ -126,7 +130,10 @@ private static function renderForm( $idArg = 'arg-' . urlencode($bridgeClassName) . '-' . urlencode($contextName) . '-' . urlencode($id); - $inputName = filter_var($inputEntry['name'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + // If the name is an array, apply i18n. Else, process it 'normally'. + $inputEntryName = I18n::dynamic_select($inputEntry['name']); + $inputName = filter_var($inputEntryName, FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $form .= '' . PHP_EOL; if ( @@ -151,7 +158,8 @@ private static function renderForm( $infoText[] = filter_var($inputEntry['title'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); } if ($inputEntry['exampleValue'] !== '') { - $infoText[] = "Example (right click to use):\n" . filter_var($inputEntry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $infoText[] = xlat('bridge_card:example_right_click') . ":\n" + . filter_var($inputEntry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); $infoTextScript = 'rssbridge_use_placeholder_value(this);'; } @@ -165,7 +173,8 @@ private static function renderForm( $form .= ''; } - $form .= ''; + $form .= ''; return $form . '' . PHP_EOL; } diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php index ad4332875de..b5ccfc4d468 100644 --- a/lib/BridgeFactory.php +++ b/lib/BridgeFactory.php @@ -22,7 +22,7 @@ public function __construct() $enabledBridges = Configuration::getConfig('system', 'enabled_bridges'); if ($enabledBridges === null) { - throw new \Exception('No bridges are enabled...'); + throw new \Exception(xlat('errors:general:no_bridges_enabled')); } foreach ($enabledBridges as $enabledBridge) { if ($enabledBridge === '*') { @@ -34,7 +34,7 @@ public function __construct() $this->enabledBridges[] = $bridgeClassName; } else { $this->missingEnabledBridges[] = $enabledBridge; - $this->logger->info(sprintf('Bridge not found: %s', $enabledBridge)); + $this->logger->info(xlat('errors:general:not_found_named', $enabledBridge)); } } } diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index 90aa21ba7be..9dced2d4112 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -16,7 +16,7 @@ public function create(string $name = null): CacheInterface { $name ??= Configuration::getConfig('cache', 'type'); if (!$name) { - throw new \Exception('No cache type configured'); + throw new \Exception(xlat('errors:cache:no_type')); } $cacheNames = []; foreach (scandir(PATH_LIB_CACHES) as $file) { @@ -35,12 +35,12 @@ public function create(string $name = null): CacheInterface $index = array_search(strtolower($name), array_map('strtolower', $cacheNames)); if ($index === false) { - throw new \InvalidArgumentException(sprintf('Invalid cache name: "%s"', $name)); + throw new \InvalidArgumentException(xlat('errors:cache:bad_name', $name)); } $className = $cacheNames[$index] . 'Cache'; if (!preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $className)) { - throw new \InvalidArgumentException(sprintf('Invalid cache classname: "%s"', $className)); + throw new \InvalidArgumentException(xlat('errors:cache:bad_classname', $className)); } switch ($className) { @@ -53,27 +53,27 @@ public function create(string $name = null): CacheInterface 'enable_purge' => Configuration::getConfig('FileCache', 'enable_purge'), ]; if (!is_dir($fileCacheConfig['path'])) { - throw new \Exception(sprintf('The FileCache path does not exists: %s', $fileCacheConfig['path'])); + throw new \Exception(xlat('errors:cache:filecache_path_not_found', $fileCacheConfig['path'])); } if (!is_writable($fileCacheConfig['path'])) { - throw new \Exception(sprintf('The FileCache path is not writable: %s', $fileCacheConfig['path'])); + throw new \Exception(xlat('errors:cache:filecache_not_writable', $fileCacheConfig['path'])); } return new FileCache($this->logger, $fileCacheConfig); case SQLiteCache::class: if (!extension_loaded('sqlite3')) { - throw new \Exception('"sqlite3" extension not loaded. Please check "php.ini"'); + throw new \Exception(xlat('errors:cache:not_loaded', 'sqlite')); } if (!is_writable(PATH_CACHE)) { - throw new \Exception('The cache folder is not writable'); + throw new \Exception(xlat('errors:cache:path_not_writable')); } $file = Configuration::getConfig('SQLiteCache', 'file'); if (!$file) { - throw new \Exception(sprintf('Configuration for %s missing.', 'SQLiteCache')); + throw new \Exception(xlat('errors:cache:config_missing', 'SQLiteCache')); } if (dirname($file) == '.') { $file = PATH_CACHE . $file; } elseif (!is_dir(dirname($file))) { - throw new \Exception(sprintf('Invalid configuration for %s', 'SQLiteCache')); + throw new \Exception(xlat('errors:cache:config_invalid', 'SQLiteCache')); } return new SQLiteCache($this->logger, [ 'file' => $file, @@ -82,31 +82,31 @@ public function create(string $name = null): CacheInterface ]); case MemcachedCache::class: if (!extension_loaded('memcached')) { - throw new \Exception('"memcached" extension not loaded. Please check "php.ini"'); + throw new \Exception(xlat('errors:cache:not_loaded', 'memcached')); } $section = 'MemcachedCache'; $host = Configuration::getConfig($section, 'host'); $port = Configuration::getConfig($section, 'port'); if (empty($host) && empty($port)) { - throw new \Exception('Configuration for ' . $section . ' missing.'); + throw new \Exception(xlat('errors:cache:config_missing', $section)); } if (empty($host)) { - throw new \Exception('"host" param is not set for ' . $section); + throw new \Exception(xlat('errors:cache:param_not_set', 'host', $section)); } if (empty($port)) { - throw new \Exception('"port" param is not set for ' . $section); + throw new \Exception(xlat('errors:cache:param_not_set', 'port', $section)); } if (!ctype_digit($port)) { - throw new \Exception('"port" param is invalid for ' . $section); + throw new \Exception(xlat('errors:cache:param_invalid', 'port', $section)); } $port = intval($port); if ($port < 1 || $port > 65535) { - throw new \Exception('"port" param is invalid for ' . $section); + throw new \Exception(xlat('errors:cache:param_invalid', 'port', $section)); } return new MemcachedCache($this->logger, $host, $port); default: if (!file_exists(PATH_LIB_CACHES . $className . '.php')) { - throw new \Exception('Unable to find the cache file'); + throw new \Exception(xlat('errors:cache:missing_file')); } return new $className(); } diff --git a/lib/Configuration.php b/lib/Configuration.php index 63f67a3c027..b104a251ec6 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -198,6 +198,9 @@ public static function loadConfiguration(array $customConfig = [], array $env = public static function getConfig(string $section, string $key, $default = null) { + if (self::$config === []) { + throw new \Exception('Config has not been loaded'); + } return self::$config[strtolower($section)][strtolower($key)] ?? $default; } diff --git a/lib/Debug.php b/lib/Debug.php index 4333b3a54c6..ba9e787e18d 100644 --- a/lib/Debug.php +++ b/lib/Debug.php @@ -16,6 +16,9 @@ public static function isEnabled(): bool return false; } + /** + * @deprecated Use $this->logger->debug() + */ public static function log($message) { $e = new \Exception(); diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index fe809bc259b..760d1534f60 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -7,20 +7,20 @@ abstract class FeedExpander extends BridgeAbstract { private array $feed; - public function collectExpandableDatas(string $url, $maxItems = -1) + public function collectExpandableDatas(string $url, $maxItems = -1, $headers = []) { if (!$url) { - throw new \Exception('There is no $url for this RSS expander'); + throw new \Exception(xlat('errors:expander:no_url')); } $maxItems = (int) $maxItems; if ($maxItems === -1) { $maxItems = 999; } $accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*']; - $httpHeaders = ['Accept: ' . implode(', ', $accept)]; + $httpHeaders = array_merge(['Accept: ' . implode(', ', $accept)], $headers); $xmlString = getContents($url, $httpHeaders); if ($xmlString === '') { - throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); + throw new \Exception(xlat('errors:expander:bad_xml_url', $url), 10); } // prepare/massage the xml to make it more acceptable $problematicStrings = [ @@ -35,7 +35,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) $this->feed = $feedParser->parseFeed($xmlString); } catch (\Exception $e) { // FeedMergeBridge relies on this string - throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e))); + throw new \Exception(xlat('errors:expander:bad_xml_url_msg', $url, create_sane_exception_message($e))); } $items = array_slice($this->feed['items'], 0, $maxItems); diff --git a/lib/FeedItem.php b/lib/FeedItem.php index fc4549a7b5f..8a092a27e76 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -136,7 +136,7 @@ public function setTitle($title) { $this->title = null; if (!is_string($title)) { - Debug::log('Title must be a string!'); + trigger_error('Title must be a string: ' . print_r($title, true)); } else { $this->title = truncate(trim($title)); } @@ -186,21 +186,23 @@ public function getContent(): ?string } /** - * @param string|object $content The item content as text or simple_html_dom object. + * @param string|array|\simple_html_dom|\simple_html_dom_node $content The item content */ public function setContent($content) { $this->content = null; + if ( $content instanceof simple_html_dom || $content instanceof simple_html_dom_node ) { $content = (string) $content; } + if (is_string($content)) { $this->content = $content; } else { - Debug::log(sprintf('Feed content must be a string but got %s', gettype($content))); + Debug::log(sprintf('Unable to convert feed content to string: %s', gettype($content))); } } diff --git a/lib/FeedParser.php b/lib/FeedParser.php index b774cc14e23..c8fe43573a7 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -23,7 +23,7 @@ public function parseFeed(string $xmlString): array if ($xmlErrors) { $firstXmlErrorMessage = $xmlErrors[0]->message; } - throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? '')); + throw new \Exception(xlat('errors:parser:bad_xml_msg', $firstXmlErrorMessage ?? '')); } $feed = [ 'title' => null, @@ -79,7 +79,7 @@ public function parseFeed(string $xmlString): array $feed['items'][] = $this->parseAtomItem($item); } } else { - throw new \Exception('Unable to detect feed format'); + throw new \Exception(xlat('errors:parser:feed_format')); } return $feed; @@ -167,6 +167,12 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array if (isset($namespaces['media'])) { $media = $feedItem->children($namespaces['media']); } + + if (isset($namespaces['content'])) { + $content = $feedItem->children($namespaces['content']); + $item['content'] = (string) $content; + } + foreach ($namespaces as $namespaceName => $namespaceUrl) { if (in_array($namespaceName, ['', 'content', 'media'])) { continue; diff --git a/lib/FormatFactory.php b/lib/FormatFactory.php index e9cbe597770..09ae7ccfd4b 100644 --- a/lib/FormatFactory.php +++ b/lib/FormatFactory.php @@ -18,11 +18,11 @@ public function __construct() public function create(string $name): FormatAbstract { if (! preg_match('/^[a-zA-Z0-9-]*$/', $name)) { - throw new \InvalidArgumentException('Format name invalid!'); + throw new \InvalidArgumentException(xlat('errors:format:invalid_name', $name)); } $sanitizedName = $this->sanitizeName($name); if (!$sanitizedName) { - throw new \InvalidArgumentException(sprintf('Unknown format given `%s`', $name)); + throw new \InvalidArgumentException(xlat('errors:format:invalid_name', $name)); } $className = '\\' . $sanitizedName . 'Format'; return new $className(); diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 1bb5f5ea452..e80e6f0a553 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -2,25 +2,18 @@ final class RssBridge { - private static CacheInterface $cache; private static Logger $logger; + private static CacheInterface $cache; private static HttpClient $httpClient; - public function __construct() - { - self::$logger = new SimpleLogger('rssbridge'); - if (Debug::isEnabled()) { - self::$logger->addHandler(new StreamHandler(Logger::DEBUG)); - } else { - self::$logger->addHandler(new StreamHandler(Logger::INFO)); - } - self::$httpClient = new CurlHttpClient(); - $cacheFactory = new CacheFactory(self::$logger); - if (Debug::isEnabled()) { - self::$cache = $cacheFactory->create('array'); - } else { - self::$cache = $cacheFactory->create(); - } + public function __construct( + Logger $logger, + CacheInterface $cache, + HttpClient $httpClient + ) { + self::$logger = $logger; + self::$cache = $cache; + self::$httpClient = $httpClient; } public function main(array $argv = []): Response @@ -100,22 +93,19 @@ public function main(array $argv = []): Response $className = '\\' . $actionName; $actionObject = new $className(); - $response = $actionObject->execute($request); + $response = $actionObject($request); - if (is_string($response)) { - $response = new Response($response); - } return $response; } - public static function getCache(): CacheInterface + public static function getLogger(): Logger { - return self::$cache; + return self::$logger; } - public static function getLogger(): Logger + public static function getCache(): CacheInterface { - return self::$logger; + return self::$cache; } public static function getHttpClient(): HttpClient diff --git a/lib/bootstrap.php b/lib/bootstrap.php index bfc7be39704..067f261b014 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -14,6 +14,7 @@ // Files $files = [ __DIR__ . '/../lib/html.php', + __DIR__ . '/../lib/i18n.php', __DIR__ . '/../lib/contents.php', __DIR__ . '/../lib/php8backports.php', __DIR__ . '/../lib/utils.php', @@ -45,9 +46,3 @@ } } }); - -$customConfig = []; -if (file_exists(__DIR__ . '/../config.ini.php')) { - $customConfig = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); -} -Configuration::loadConfiguration($customConfig, getenv()); diff --git a/lib/contents.php b/lib/contents.php index ba6dd531a10..36b90f3f6a6 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -5,8 +5,8 @@ * * @param array $httpHeaders E.g. ['Content-type: text/plain'] * @param array $curlOptions Associative array e.g. [CURLOPT_MAXREDIRS => 3] - * @param bool $returnFull Whether to return an array: ['code' => int, 'headers' => array, 'content' => string] - * @return string|array + * @param bool $returnFull Whether to return Response object + * @return string|Response */ function getContents( string $url, @@ -113,13 +113,7 @@ function getContents( throw $e; } if ($returnFull === true) { - // todo: return the actual response object - return [ - 'code' => $response->getCode(), - 'headers' => $response->getHeaders(), - // For legacy reasons, use 'content' instead of 'body' - 'content' => $response->getBody(), - ]; + return $response; } return $response->getBody(); } @@ -148,7 +142,6 @@ function getContents( * when returning plaintext. * @param string $defaultSpanText Specifies the replacement text for `` * tags when returning plaintext. - * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOM( $url, @@ -160,11 +153,12 @@ function getSimpleHTMLDOM( $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT -) { +): \simple_html_dom { $html = getContents($url, $header ?? [], $opts ?? []); if ($html === '') { - throw new \Exception('Unable to parse dom because the http response was the empty string'); + throw new \Exception(xlat('errors:dom_empty')); } + return str_get_html( $html, $lowercase, diff --git a/lib/i18n.php b/lib/i18n.php new file mode 100644 index 00000000000..b1363b4a944 --- /dev/null +++ b/lib/i18n.php @@ -0,0 +1,157 @@ + $k) { + if (str_starts_with($k, $languageLocale) && is_string($input[$k])) { + return $input[$k]; + } + } + + return ''; + } + + /* + * translate : Primary translation function for i18n. + * + * Input selector values are paths into the table separated by ':' characters. + */ + public static function translate(string $selector, mixed ...$vars): ?string + { + $path = explode(':', $selector, 5); + $result = null; + + foreach ($path as $idx => $leaf) { + $result = ($idx === 0 ? self::$LEXICON : $result)[$leaf]; + } + + if ( + self::$LEXICON['complete'] === true + && !$result + && Configuration::getConfig('system', 'enforce_complete_translations') + ) { + // Missing translations when the language is marked as complete will throw (in English). + throw new \Exception( + sprintf( + 'Missing translation item within a supposedly "completed" language (%s): "%s"', + Configuration::getConfig('system', 'app_language', 'en-US'), + $selector + ) + ); + } + + if (count($vars)) { + $resultFormatted = sprintf($result, ...$vars); + } else { + $resultFormatted = $result; + } + + return $resultFormatted; + } +} + + +/* + * xlat : Global function to abbreviate calls to the I18n::translate method. + */ +function xlat(string $selector, mixed ...$vars): ?string +{ + return I18n::translate($selector, ...$vars); +} diff --git a/lib/logger.php b/lib/logger.php index e579915dc31..3ebe3b0afb9 100644 --- a/lib/logger.php +++ b/lib/logger.php @@ -68,6 +68,16 @@ public function error(string $message, array $context = []): void private function log(int $level, string $message, array $context = []): void { + $ignoredMessages = [ + 'Format name invalid', + 'Unknown format given', + 'Unable to find channel', + ]; + foreach ($ignoredMessages as $ignoredMessage) { + if (str_starts_with($message, $ignoredMessage)) { + return; + } + } foreach ($this->handlers as $handler) { $handler([ 'name' => $this->name, @@ -83,10 +93,12 @@ private function log(int $level, string $message, array $context = []): void final class StreamHandler { + private string $stream; private int $level; - public function __construct(int $level = Logger::DEBUG) + public function __construct(string $stream, int $level = Logger::DEBUG) { + $this->stream = $stream; $this->level = $level; } @@ -106,29 +118,54 @@ public function __invoke(array $record) $record['context']['line'] = $e->getLine(); $record['context']['url'] = get_current_url(); $record['context']['trace'] = trace_to_call_points(trace_from_exception($e)); - - $ignoredExceptions = [ - 'You must specify a format', - 'Format name invalid', - 'Unknown format given', - 'Bridge name invalid', - 'Invalid action', - 'twitter: No results for this query', - // telegram - 'Unable to find channel. The channel is non-existing or non-public', - // fb - 'This group is not public! RSS-Bridge only supports public groups!', - 'You must be logged in to view this page', - 'Unable to get the page id. You should consider getting the ID by hand', - // tiktok 404 - 'https://www.tiktok.com/@', - ]; - foreach ($ignoredExceptions as $ignoredException) { - if (str_starts_with($e->getMessage(), $ignoredException)) { - return; - } + } + $context = ''; + if ($record['context']) { + try { + $context = Json::encode($record['context']); + } catch (\JsonException $e) { + $record['context']['message'] = null; + $context = Json::encode($record['context']); } } + $text = sprintf( + "[%s] %s.%s %s %s\n", + $record['created_at']->format('Y-m-d H:i:s'), + $record['name'], + $record['level_name'], + $record['message'], + $context + ); + $bytes = file_put_contents($this->stream, $text, FILE_APPEND | LOCK_EX); + } +} + +final class ErrorLogHandler +{ + private int $level; + + public function __construct(int $level = Logger::DEBUG) + { + $this->level = $level; + } + + public function __invoke(array $record) + { + if ($record['level'] < $this->level) { + return; + } + if (isset($record['context']['e'])) { + /** @var \Throwable $e */ + $e = $record['context']['e']; + unset($record['context']['e']); + $record['context']['type'] = get_class($e); + $record['context']['code'] = $e->getCode(); + $record['context']['message'] = sanitize_root($e->getMessage()); + $record['context']['file'] = sanitize_root($e->getFile()); + $record['context']['line'] = $e->getLine(); + $record['context']['url'] = get_current_url(); + $record['context']['trace'] = trace_to_call_points(trace_from_exception($e)); + } $context = ''; if ($record['context']) { try { @@ -143,17 +180,10 @@ public function __invoke(array $record) $record['created_at']->format('Y-m-d H:i:s'), $record['name'], $record['level_name'], - // Should probably sanitize message for output context $record['message'], $context ); error_log($text); - if ($record['level'] < Logger::ERROR && Debug::isEnabled()) { - // The record level is INFO or WARNING here - // Not a good idea to print here because http headers might not have been sent - print sprintf("
    %s
    \n", e($text)); - } - //$bytes = file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX); } } diff --git a/lib/simplehtmldom/simple_html_dom.php b/lib/simplehtmldom/simple_html_dom.php index 3fc95760133..170f6fb0960 100644 --- a/lib/simplehtmldom/simple_html_dom.php +++ b/lib/simplehtmldom/simple_html_dom.php @@ -118,11 +118,6 @@ function str_get_html( throw new \Exception('Refusing to parse too big input'); } - if (empty($str) || strlen($str) > MAX_FILE_SIZE) { - $dom->clear(); - return false; - } - return $dom->load($str, $lowercase, $stripRN); } diff --git a/lib/url.php b/lib/url.php index 993fef96c10..2be1984d511 100644 --- a/lib/url.php +++ b/lib/url.php @@ -26,12 +26,12 @@ private function __construct() public static function fromString(string $url): self { if (!self::validate($url)) { - throw new UrlException(sprintf('Illegal url: "%s"', $url)); + throw new UrlException(xlat('errors:url:illegal', $url)); } $parts = parse_url($url); if ($parts === false) { - throw new UrlException(sprintf('Failed to parse_url(): %s', $url)); + throw new UrlException(xlat('errors:url:parse', $url)); } return (new self()) @@ -85,7 +85,7 @@ public function getQueryString(): string public function withScheme(string $scheme): self { if (!in_array($scheme, ['http', 'https'])) { - throw new UrlException(sprintf('Invalid scheme %s', $scheme)); + throw new UrlException(xlat('errors:url:scheme', $scheme)); } $clone = clone $this; $clone->scheme = $scheme; @@ -109,7 +109,7 @@ public function withPort(int $port) public function withPath(string $path): self { if (!str_starts_with($path, '/')) { - throw new UrlException(sprintf('Path must start with forward slash: %s', $path)); + throw new UrlException(xlat('errors:url:path_slash', $path)); } $clone = clone $this; $clone->path = $path; diff --git a/templates/bridge-error.html.php b/templates/bridge-error.html.php index 8ece80be538..025f9394382 100644 --- a/templates/bridge-error.html.php +++ b/templates/bridge-error.html.php @@ -1,12 +1,12 @@ - - + + - - + +

    diff --git a/templates/connectivity.html.php b/templates/connectivity.html.php index c00e8177eab..48c41042bfd 100644 --- a/templates/connectivity.html.php +++ b/templates/connectivity.html.php @@ -23,7 +23,7 @@ - + \ No newline at end of file diff --git a/templates/exception.html.php b/templates/exception.html.php index 62ac90b4217..2a2133bbbb7 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -5,114 +5,79 @@ ?>

    - - -

    The website is protected by CloudFlare

    -

    - RSS-Bridge tried to fetch a website. - The fetching was blocked by CloudFlare. - CloudFlare is anti-bot software. - Its purpose is to block non-humans. -

    - - - getCode() === 400): ?> -

    400 Bad Request

    -

    - This is usually caused by an incorrectly constructed http request. -

    - - - getCode() === 403): ?> -

    403 Forbidden

    -

    - The HTTP 403 Forbidden response status code indicates that the - server understands the request but refuses to authorize it. -

    - - - getCode() === 404): ?> -

    404 Page Not Found

    -

    - RSS-Bridge tried to fetch a page on a website. - But it doesn't exists. -

    - - - getCode() === 429): ?> -

    429 Too Many Requests

    -

    - RSS-Bridge tried to fetch a website. - They told us to try again later. -

    - - - getCode() === 503): ?> -

    503 Service Unavailable

    -

    - Common causes are a server that is down for maintenance - or that is overloaded. -

    - - - getCode() === 0): ?> -

    - See - - https://curl.haxx.se/libcurl/c/libcurl-errors.html - - for description of the curl error code. -

    - -

    - - https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/getCode()) ?> - -

    - - - - getCode() === 10): ?> -

    The rss feed is completely empty

    -

    - RSS-Bridge tried parse the empty string as xml. - The fetched url is not pointing to real xml. -

    - - - getCode() === 11): ?> -

    There is something wrong with the rss feed

    -

    - RSS-Bridge tried parse xml. It failed. The xml is probably broken. -

    - + +

    +

    -

    Details

    + getCode()) { + // Generally speaking, we can safely assume any HttpException with + // the codes 10 or 11 are not valid (since it is always 3 digits). + case 10: + case 11: + if ($e instanceof HttpException) { + print '

    ???

    '; + break; + } + $e_banner = xlat('errors:curl:e' . $e->getCode() . ':banner'); + $e_reason = xlat('errors:curl:e' . $e->getCode() . ':reason'); + print "

    {$e_banner}

    {$e_reason}

    "; + break; + case 400: + case 403: + case 404: + case 429: + case 503: + $e_banner = xlat('errors:http_exceptions:e' . $e->getCode() . ':banner'); + $e_reason = xlat('errors:http_exceptions:e' . $e->getCode() . ':reason'); + print "

    {$e_banner}

    {$e_reason}

    "; + break; + case 0: + $e_msg = xlat('errors:curl:see_libcurl_errors_doc'); + echo << + https://curl.haxx.se/libcurl/c/libcurl-errors.html + +CONTENT; + break; + default: + $e_rawCode = raw($e->getCode()); + echo << + https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{$e_rawCode} +

    +CONTENT; + break; + } + ?> + +

    - Type: + :
    - Code: getCode()) ?> + : getCode()) ?>
    - Message: getMessage())) ?> + : getMessage())) ?>
    -
    - File: getFile())) ?> +
    + : getFile())) ?>
    - Line: getLine()) ?> + : getLine()) ?>
    -

    Trace

    +

    $frame) : ?> @@ -123,26 +88,26 @@
    -

    Context

    +

    - Query: + :
    - Version: + :
    - OS: + :
    - PHP: + PHP:

    - Go back +
    diff --git a/templates/frontpage.html.php b/templates/frontpage.html.php index c118267307b..4ce6a9ce574 100644 --- a/templates/frontpage.html.php +++ b/templates/frontpage.html.php @@ -6,12 +6,12 @@