ReadAsMyGo/content.js at master · Paper-blue/ReadAsMyGo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
let isEnhancedReadingActive = false;
let boldnessLevel = 'medium';
let englishBoldness = 0.5;
let chineseBoldness = 0.2;
let maxBoldWordsPerSentence = 3;  // 新增: 每句话最大加粗词数

let commonWords = new Set();

let wordFrequencyData = {};

fetch(chrome.runtime.getURL('common_words.json'))
  .then(response => response.json())
  .then(data => {
    commonWords = new Set(data.filter(word => word.length > 1));
    console.log('Common words loaded:', commonWords.size);
    wordFrequencyData = data.reduce((acc, word, index) => {
      acc[word] = data.length - index; // 词频越高，分数越高
      return acc;
    }, {});
    console.log('Word frequency data loaded:', Object.keys(wordFrequencyData).length);
  })
  .catch(error => console.error('Error loading common words:', error));

const punctuations = '\u3002\uFF0C\u3001\uFF1B\uFF1A\uFF1F\uFF01\u201C\u201D\u2018\u2019\uFF08\uFF09\u3010\u3011\u300A\u300B';

chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
  if (request.action === "toggleReading") {
    isEnhancedReadingActive = !isEnhancedReadingActive;
    boldnessLevel = request.boldness;
    englishBoldness = request.englishBoldness;
    chineseBoldness = request.chineseBoldness;
    maxBoldWordsPerSentence = request.maxBoldWordsPerSentence;  // 新增

    if (isEnhancedReadingActive) {
      enhancedReading();
    } else {
      resetReading();
    }
    sendResponse({isActive: isEnhancedReadingActive});
    return true;
  }
});

function enhancedReading() {
  const textNodes = document.evaluate(
    '//text()[normalize-space()]',
    document.body,
    null,
    XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
    null
  );

  for (let i = 0; i < textNodes.snapshotLength; i++) {
    const node = textNodes.snapshotItem(i);
    const text = node.textContent.trim();

    if (text.length > 0) {
      const span = document.createElement('span');
      span.className = 'enhanced-reading';
      span.innerHTML = processText(text);
      node.parentNode.replaceChild(span, node);
    }
  }
}

function processText(text) {
  const segments = text.split(/(\s+)/);
  return segments.map(segment => {
    if (/[\u4e00-\u9fa5]/.test(segment)) {
      // 中文处理
      return processChinese(segment);
    } else if (/[a-zA-Z]/.test(segment)) {
      // 英文处理
      return processEnglish(segment);
    } else {
      // 其他字符（如标点符号）直接返回
      return segment;
    }
  }).join('');
}

function processChinese(text) {
  const sentences = text.split(/([。，、；：？！])/);
  let result = '';
  let isFirstWordInSentence = true;

  for (let i = 0; i < sentences.length; i++) {
    const sentence = sentences[i];
    if (punctuations.includes(sentence)) {
      result += sentence;
      isFirstWordInSentence = true; // 标点符号后的下一个词被视为新句子的开始
      continue;
    }

    const words = segmentChinese(sentence);
    let boldedWords = 0;
    const wordScores = words.map((word, index) => ({
      word,
      score: wordFrequencyData[word] || 0,
      index
    }));
    wordScores.sort((a, b) => b.score - a.score);  // 按词频分数降序排序

    const boldIndices = new Set(wordScores.slice(0, maxBoldWordsPerSentence).map(w => w.index));

    for (let j = 0; j < words.length; j++) {
      const word = words[j];
      if (isFirstWordInSentence) {
        result += `<b>${word}</b>`;
        isFirstWordInSentence = false;
        boldedWords++;
      } else if (boldedWords < maxBoldWordsPerSentence && boldIndices.has(j) && Math.random() < chineseBoldness) {
        result += `<b>${word}</b>`;
        boldedWords++;
      } else {
        result += word;
      }
    }
  }

  return result;
}

function segmentChinese(text) {
  const words = [];
  let start = 0;

  while (start < text.length) {
    let end = start + 1;
    let longestMatch = text[start];

    // 使用词频表进行前向最大匹配
    for (let i = start + 1; i <= start + 4 && i <= text.length; i++) {
      const candidate = text.slice(start, i);
      if (wordFrequencyData.hasOwnProperty(candidate)) {
        longestMatch = candidate;
        end = i;
      }
    }

    words.push(longestMatch);
    start = end;
  }

  return words;
}

function shouldBoldChinese(word) {
  // 中文加粗概率
  let probability = chineseBoldness;

  // 应用加粗程度
  switch (boldnessLevel) {
    case 'light': probability *= 0.7; break;
    case 'heavy': probability *= 1.3; break;
  }

  return Math.random() < probability;
}

function processEnglish(text) {
  const words = text.split(/(\s+)/);
  return words.map(word => {
    if (/\s+/.test(word)) return word;
    return applyEnglishBoldness(word);
  }).join('');
}

function applyEnglishBoldness(word) {
  let boldness = englishBoldness;

  // 应用加粗程度
  switch (boldnessLevel) {
    case 'light': boldness *= 0.7; break;
    case 'heavy': boldness *= 1.3; break;
  }

  const boldPart = word.slice(0, Math.ceil(word.length * boldness));
  const restPart = word.slice(Math.ceil(word.length * boldness));
  return `<b>${boldPart}</b>${restPart}`;
}

function resetReading() {
  const enhancedNodes = document.querySelectorAll('.enhanced-reading');
  enhancedNodes.forEach(node => {
    const textNode = document.createTextNode(node.textContent);
    node.parentNode.replaceChild(textNode, node);
  });
}