Skip to content

Commit a92e7e4

Browse files
committed
[WIP][FEATURE] Add readability calculation to DeepL Overlay
1 parent f9874d0 commit a92e7e4

16 files changed

Lines changed: 471 additions & 5 deletions
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Controller;
6+
7+
use Psr\Http\Message\ResponseInterface;
8+
use Psr\Http\Message\ServerRequestInterface;
9+
use TYPO3\CMS\Core\Http\JsonResponse;
10+
use WebVision\DeeplWrite\Readability\ReadabilityCalculatorFactory;
11+
12+
final class ReadabilityController
13+
{
14+
public function __construct(private readonly ReadabilityCalculatorFactory $factory)
15+
{
16+
}
17+
18+
public function calculate(ServerRequestInterface $request): ResponseInterface
19+
{
20+
$data = $request->getParsedBody();
21+
$readabilityCalculator = $this->factory->fromLanguage($data['language']);
22+
$readabilityResult = $readabilityCalculator->calculateReadability(strip_tags($data['text'] ?? ''));
23+
return new JsonResponse($readabilityResult->jsonSerialize());
24+
}
25+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Org\Heigl\Hyphenator\Hyphenator;
8+
use WebVision\DeeplWrite\Readability\ReadabilityCalculatorInterface;
9+
10+
abstract class AbstractReadabilityCalculator implements ReadabilityCalculatorInterface
11+
{
12+
protected const LANGUAGE = 'not-supported';
13+
protected const SENTENCE_SPLIT = '/([!\.\?] )/';
14+
protected const HYPHENATED_SPLIT = '/([(\s)+!\.\?|])/';
15+
16+
final protected function countSentences(string $text): int
17+
{
18+
$sentences = preg_split(self::SENTENCE_SPLIT, $text);
19+
if ($sentences === false) {
20+
return 0;
21+
}
22+
return count($sentences);
23+
}
24+
25+
protected function countWords(string $text): int
26+
{
27+
return str_word_count($text);
28+
}
29+
30+
final protected function countSyllables(string $text): int
31+
{
32+
$hyphenator = new Hyphenator();
33+
$hyphenator->getOptions()->setHyphen('|');
34+
$result = $hyphenator->hyphenate($text);
35+
$splitted = preg_split(self::HYPHENATED_SPLIT, $result);
36+
return count($splitted);
37+
}
38+
39+
protected function countCharacters(string $text): int
40+
{
41+
return mb_strlen($text);
42+
}
43+
44+
public function getLanguage(): string
45+
{
46+
return static::LANGUAGE;
47+
}
48+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Symfony\Component\DependencyInjection\Attribute\AsTaggedItem;
8+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
9+
10+
/**
11+
* This class is an implementation generating the Flesch Reading Ease score for German.
12+
* It calculates as follows:
13+
*
14+
* FRE = 206.835 - (1.015 * Average sentence Length (ASL)) - (84.6 * Average word length (AWL))
15+
*
16+
* ASL = (number of words) / (number of sentences)
17+
* ASW = (number of syllables) / (number of words)
18+
*
19+
* The corresponding score is between 0 and 100, where
20+
* * 0 means really difficult to read
21+
* * 100 means really easy to read
22+
*
23+
* For a better overview of the different scoring levels,
24+
* @see https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
25+
*/
26+
#[AsTaggedItem('deepl.readability')]
27+
final class FleschKincaidEnglish extends AbstractReadabilityCalculator
28+
{
29+
protected const LANGUAGE = 'en-us';
30+
public function calculateReadability(string $text): ReadabilityResult
31+
{
32+
$sentences = $this->countSentences($text);
33+
$words = $this->countWords($text);
34+
$syllables = $this->countSyllables($text);
35+
$characters = $this->countCharacters($text);
36+
return new ReadabilityResult(
37+
$text,
38+
$sentences,
39+
$words,
40+
$syllables,
41+
$characters,
42+
$this->calculateScore($words, $sentences, $syllables)
43+
);
44+
}
45+
46+
private function calculateScore(
47+
int $words,
48+
int $sentences,
49+
int $syllables
50+
): float {
51+
if ($sentences <= 0) {
52+
$sentences = 1;
53+
}
54+
if ($words <= 0) {
55+
throw new \InvalidArgumentException(
56+
'The number of words can not be negative or zero!',
57+
1757680362
58+
);
59+
}
60+
61+
// Too easy sentences and short texts COULD result in calculating a value above 100. In this case
62+
// set the result to 100, as this is the maximum.
63+
// This is a known issue in this formula, but can be ignored for a quick overview, as
64+
// 100 means very easy to read.
65+
$fleschKincaid = 206.835 - 1.015 * ($words/$sentences) - (84.6 * $syllables/$words);
66+
return ($fleschKincaid <= 100.0) ? $fleschKincaid : 100.0;
67+
}
68+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Symfony\Component\DependencyInjection\Attribute\AsTaggedItem;
8+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
9+
10+
/**
11+
* This class is an implementation generating the Flesch Reading Ease score for German.
12+
* It calculates as follows:
13+
*
14+
* FRE = 180 - (Average sentence Length (ASL)) - (58.5 * Average word length (AWL))
15+
*
16+
* ASL = (number of words) / (number of sentences)
17+
* ASW = (number of syllables) / (number of words)
18+
*
19+
* The corresponding score is between 0 and 100, where
20+
* * 0 means really difficult to read
21+
* * 100 means really easy to read
22+
*
23+
* For a better overview of the different scoring levels,
24+
* @see https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
25+
*
26+
* For German calculation,
27+
* @see https://de.wikipedia.org/wiki/Lesbarkeitsindex#F%C3%BCr_Deutsch
28+
*/
29+
#[AsTaggedItem('deepl.readability')]
30+
final class FleschKincaidGerman extends AbstractReadabilityCalculator
31+
{
32+
protected const LANGUAGE = 'de';
33+
public function calculateReadability(string $text): ReadabilityResult
34+
{
35+
$sentences = $this->countSentences($text);
36+
$words = $this->countWords($text);
37+
$syllables = $this->countSyllables($text);
38+
$characters = $this->countCharacters($text);
39+
return new ReadabilityResult(
40+
$text,
41+
$sentences,
42+
$words,
43+
$syllables,
44+
$characters,
45+
$this->calculateScore($words, $sentences, $syllables)
46+
);
47+
}
48+
49+
private function calculateScore(
50+
int $words,
51+
int $sentences,
52+
int $syllables
53+
): float {
54+
if ($sentences <= 0) {
55+
$sentences = 1;
56+
}
57+
if ($words <= 0) {
58+
throw new \InvalidArgumentException(
59+
'The number of words can not be negative or zero!',
60+
1757679534
61+
);
62+
}
63+
return 180 - ($words/$sentences) - (58.5 * $syllables/$words);
64+
}
65+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
final class ReadabilityCalculatorFactory
8+
{
9+
public function __construct(private readonly ReadabilityCalculatorRegistryInterface $registry)
10+
{
11+
}
12+
13+
public function fromLanguage(string $language): ReadabilityCalculatorInterface
14+
{
15+
return $this->registry->findByLanguage($language);
16+
}
17+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
8+
9+
interface ReadabilityCalculatorInterface
10+
{
11+
public function getLanguage(): string;
12+
public function calculateReadability(string $text): ReadabilityResult;
13+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
final class ReadabilityCalculatorRegistry implements ReadabilityCalculatorRegistryInterface
8+
{
9+
/**
10+
* @var array<ReadabilityCalculatorInterface>
11+
*/
12+
private array $services;
13+
public function __construct(iterable $calculators)
14+
{
15+
foreach ($calculators as $calculator) {
16+
$this->services[] = $calculator;
17+
}
18+
}
19+
20+
public function findByLanguage(string $language): ReadabilityCalculatorInterface
21+
{
22+
foreach ($this->services as $service) {
23+
if ($service->getLanguage() === $language) {
24+
return $service;
25+
}
26+
}
27+
throw new \InvalidArgumentException(
28+
sprintf('No service found for langauge "%s"', $language),
29+
1757686580
30+
);
31+
}
32+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
interface ReadabilityCalculatorRegistryInterface
8+
{
9+
public function findByLanguage(string $language): ReadabilityCalculatorInterface;
10+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Result;
6+
7+
/**
8+
* Represents the result of a readability analysis performed on a given text.
9+
* It provides metrics such as sentence, word, syllable, and character counts,
10+
* as well as a calculated readability score and averages per sentence or word.
11+
*/
12+
final class ReadabilityResult implements \JsonSerializable
13+
{
14+
public function __construct(
15+
public readonly string $text,
16+
public readonly int $sentences,
17+
public readonly int $words,
18+
public readonly int $syllables,
19+
public readonly int $characters,
20+
public readonly float $score
21+
) {
22+
}
23+
24+
public function getAverageWordsPerSentence(): float
25+
{
26+
return round($this->words/$this->sentences, 2);
27+
}
28+
29+
public function getAverageSyllablesPerWord(): float
30+
{
31+
return round($this->syllables/$this->words, 2);
32+
}
33+
34+
public function jsonSerialize(): array
35+
{
36+
return [
37+
'sentences' => $this->sentences,
38+
'words' => $this->words,
39+
'syllables' => $this->syllables,
40+
'characters' => $this->characters,
41+
'avgSyllables' => $this->getAverageSyllablesPerWord(),
42+
'avgWords' => $this->getAverageWordsPerSentence(),
43+
'score' => $this->score,
44+
];
45+
}
46+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Service;
6+
7+
final class ReadingEaseService
8+
{
9+
10+
}

0 commit comments

Comments
 (0)