diff --git a/Classes/Service/CleanHtmlService.php b/Classes/Service/CleanHtmlService.php index 9722fa0..f61a6f6 100644 --- a/Classes/Service/CleanHtmlService.php +++ b/Classes/Service/CleanHtmlService.php @@ -104,7 +104,7 @@ public function clean(string $html, array $config = []): string } // convert line-breaks to UNIX - $this->convNlOs($html); + $html = preg_replace('/\r\n|\r/', $this->newline, $html); $manipulations = []; @@ -116,16 +116,17 @@ public function clean(string $html, array $config = []): string $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class); } - if (!empty($this->headerComment)) { - $this->includeHeaderComment($html); - } - foreach ($manipulations as $key => $manipulation) { /** @var ManipulationInterface $manipulation */ $configuration = isset($config[$key . '.']) && \is_array($config[$key . '.']) ? $config[$key . '.'] : []; $html = $manipulation->manipulate($html, $configuration); } + // include configured header comment in HTML content block + if (!empty($this->headerComment)) { + $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html, 1); + } + // cleanup HTML5 self-closing elements if (!isset($GLOBALS['TSFE']->config['config']['doctype']) || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) { @@ -136,13 +137,11 @@ public function clean(string $html, array $config = []): string ); } - if ($this->formatType > 0) { - $html = $this->formatHtml($html); + if ($this->formatType) { + $indenter = new \Gajus\Dindent\Indenter(['indentation_character' => $this->tab]); + $html = $indenter->indent($html); } - // remove white space after line ending - $this->rTrimLines($html); - // recover line-breaks if (Environment::isWindows()) { $html = str_replace($this->newline, "\r\n", $html); @@ -150,262 +149,4 @@ public function clean(string $html, array $config = []): string return (string) $html; } - - /** - * Formats the (X)HTML code: - * - taps according to the hirarchy of the tags - * - removes empty spaces between tags - * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) - * choose from five options: - * 0 => off - * 1 => no line break at all (code in one line) - * 2 => minimalistic line breaks (structure defining box-elements) - * 3 => aesthetic line breaks (important box-elements) - * 4 => logic line breaks (all box-elements) - * 5 => max line breaks (all elements). - */ - protected function formatHtml(string $html): string - { - // Save original formated pre, textarea, comments, styles and scripts & replace them with markers - preg_match_all( - '/(?s)(()|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', - $html, - $matches - ); - $noFormat = $matches[0]; // do not format these block elements - for ($i = 0; $i < \count($noFormat); ++$i) { - $html = str_replace($noFormat[$i], "\n", $html); - } - - // define box elements for formatting - $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; - $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; - $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; - $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; - $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; - $structureBoxLikeElements = '(?>html|head|body|div|!--)'; - - // split html into it's elements - $htmlArrayTemp = preg_split( - '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', - $html, - -1, - \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY - ); - - if (false === $htmlArrayTemp) { - // Restore saved comments, styles and scripts - for ($i = 0; $i < \count($noFormat); ++$i) { - $html = str_replace("", $noFormat[$i], $html); - } - - return $html; - } - // remove empty lines - $htmlArray = ['']; - $index = 1; - for ($x = 0; $x < \count($htmlArrayTemp); ++$x) { - $text = trim($htmlArrayTemp[$x]); - $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar; - ++$index; - } - - // rebuild html - $html = ''; - $tabs = 0; - for ($x = 0; $x < \count($htmlArray); ++$x) { - $htmlArrayBefore = $htmlArray[$x - 1] ?? ''; - $htmlArrayCurrent = $htmlArray[$x] ?? ''; - - // check if the element should stand in a new line - $newline = false; - if ('formatType && ( // minimalistic line break - // this element has a line break before itself - preg_match( - '/<' . $structureBoxLikeElements . '(.*)>/Usi', - $htmlArrayCurrent - ) || preg_match( - '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', - $htmlArrayCurrent - ) // one element before is a element that has a line break after - || preg_match( - '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', - $htmlArrayBefore - ) || '", $noFormat[$i], $html); - } - - // include debug comment at the end - if (0 != $tabs && true === $this->debugComment) { - $html .= ""; - } - - return $html; - } - - /** - * Remove ALL line breaks and multiple white space. - */ - protected function killLineBreaks(string $html): string - { - $html = str_replace($this->newline, '', $html); - - return preg_replace('/\s\s+/u', ' ', $html); - // ? return preg_replace('/\n|\s+(\s)/u', '$1', $html); - } - - /** - * Remove multiple white space, keeps line breaks. - */ - protected function killWhiteSpace(string $html): string - { - $temp = explode($this->newline, $html); - for ($i = 0; $i < \count($temp); ++$i) { - if (!trim($temp[$i])) { - unset($temp[$i]); - continue; - } - - $temp[$i] = trim($temp[$i]); - $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); - } - - return implode($this->newline, $temp); - } - - /** - * Remove white space at the end of lines, keeps other white space and line breaks. - */ - protected function rTrimLines(string &$html): void - { - $html = preg_replace('/\s+$/m', '', $html); - } - - /** - * Convert newlines according to the current OS. - */ - protected function convNlOs(string &$html): void - { - $html = preg_replace("(\r\n|\r)", $this->newline, $html); - } - - /** - * Remove empty lines. - */ - protected function removeEmptyLines(string &$html): void - { - $temp = explode($this->newline, $html); - $result = []; - for ($i = 0; $i < \count($temp); ++$i) { - if ('' == trim($temp[$i])) { - continue; - } - $result[] = $temp[$i]; - } - $html = implode($this->newline, $result); - } - - /** - * Include configured header comment in HTML content block. - */ - public function includeHeaderComment(string &$html): void - { - $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html); - } } diff --git a/README.md b/README.md index 48b9bd8..49b386d 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,3 @@ composer install jweiland/replacer |:----------------------------------|:------------|:-----------------------------------------------------------------|:-------------------| | svgstore.enabled | boolean | Is the SVG extract & merge enabled for this template | 1 | | svgstore.fileSize | integer | Maximum file size of a SVG to include (in `[byte]`) | 50000 | - ---- -##### ToDo: -- Try external packages like https://github.com/ArjanSchouten/HtmlMinifier diff --git a/composer.json b/composer.json index e80e5fe..7339399 100644 --- a/composer.json +++ b/composer.json @@ -6,18 +6,14 @@ "license": "GPL-2.0-or-later", "require": { "php": "^8.1", - "typo3/cms-core": "^12.4||^13.4" + "typo3/cms-core": "^12.4||^13.4", + "schleuse/dindent": "dev-feature/new-major" }, "autoload": { "psr-4": { "HTML\\Sourceopt\\": "Classes/" } }, - "replace": { - "maxserv/replacecontent": "*", - "typo3-ter/replacecontent": "*", - "typo3-ter/sourceopt": "self.version" - }, "require-dev": { "typo3/testing-framework": "^8.2", "friendsofphp/php-cs-fixer": "^3.3", @@ -29,6 +25,14 @@ "TYPO3\\CMS\\Core\\Tests\\": ".Build/vendor/typo3/cms/typo3/sysext/core/Tests/" } }, + "replace": { + "maxserv/replacecontent": "*", + "typo3-ter/replacecontent": "*", + "typo3-ter/sourceopt": "self.version" + }, + "suggest": { + "lochmueller/staticfilecache": "A very flexible and very, very, very fast cache to TYPO3" + }, "config": { "vendor-dir": ".Build/vendor", "bin-dir": ".Build/bin",