Skip to content

Commit e4efc87

Browse files
authored
Update readability.py
1 parent b20d5c1 commit e4efc87

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

readability/readability.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ def __init__(self, input, positive_keywords=None, negative_keywords=None,
9797
negative_keywords=["mysidebar", "related", "ads"]
9898
9999
The Document class is not re-enterable.
100-
You need to create a new Document() for each HTML file to process.
100+
It is designed to create a new Document() for each HTML file to process it.
101101
102-
Provides four API methods:
103-
.get_title()
104-
.short_title()
105-
.get_content()
106-
.summary()
102+
API methods:
103+
.title() -- full title
104+
.short_title() -- cleaned up title
105+
.content() -- full content
106+
.summary() -- cleaned up content
107107
"""
108108
self.input = input
109109
self.html = None
@@ -143,7 +143,7 @@ def _parse(self, input):
143143
return doc
144144

145145
def content(self):
146-
"""Returns full document body"""
146+
"""Returns document body"""
147147
return get_body(self._html(True))
148148

149149
def title(self):
@@ -168,8 +168,8 @@ def summary(self, html_partial=False):
168168
:param html_partial: return only the div of the document, don't wrap
169169
in html and body tags.
170170
171-
Warning: It mangles internal DOM representation of the HTML document,
172-
so always use other API methods before this one.
171+
Warning: It mutates internal DOM representation of the HTML document,
172+
so it is better to call other API methods before this one.
173173
"""
174174
try:
175175
ruthless = True
@@ -395,7 +395,6 @@ def score_node(self, elem):
395395
}
396396

397397
def remove_unlikely_candidates(self):
398-
"""Utility method"""
399398
for elem in self.html.iter():
400399
s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
401400
if len(s) < 2:
@@ -405,7 +404,6 @@ def remove_unlikely_candidates(self):
405404
elem.drop_tree()
406405

407406
def transform_misused_divs_into_paragraphs(self):
408-
"""Utility method"""
409407
for elem in self.tags(self.html, 'div'):
410408
# transform <div>s that do not contain other block elements into
411409
# <p>s

0 commit comments

Comments
 (0)