@@ -97,13 +97,13 @@ def __init__(self, input, positive_keywords=None, negative_keywords=None,
9797 negative_keywords=["mysidebar", "related", "ads"]
9898
9999 The Document class is not re-enterable.
100- You need to create a new Document() for each HTML file to process.
100+ It is designed to create a new Document() for each HTML file to process it .
101101
102- Provides four API methods:
103- .get_title()
104- .short_title()
105- .get_content()
106- .summary()
102+ API methods:
103+ .title() -- full title
104+ .short_title() -- cleaned up title
105+ .content() -- full content
106+ .summary() -- cleaned up content
107107 """
108108 self .input = input
109109 self .html = None
@@ -143,7 +143,7 @@ def _parse(self, input):
143143 return doc
144144
145145 def content (self ):
146- """Returns full document body"""
146+ """Returns document body"""
147147 return get_body (self ._html (True ))
148148
149149 def title (self ):
@@ -168,8 +168,8 @@ def summary(self, html_partial=False):
168168 :param html_partial: return only the div of the document, don't wrap
169169 in html and body tags.
170170
171- Warning: It mangles internal DOM representation of the HTML document,
172- so always use other API methods before this one.
171+ Warning: It mutates internal DOM representation of the HTML document,
172+ so it is better to call other API methods before this one.
173173 """
174174 try :
175175 ruthless = True
@@ -395,7 +395,6 @@ def score_node(self, elem):
395395 }
396396
397397 def remove_unlikely_candidates (self ):
398- """Utility method"""
399398 for elem in self .html .iter ():
400399 s = "%s %s" % (elem .get ('class' , '' ), elem .get ('id' , '' ))
401400 if len (s ) < 2 :
@@ -405,7 +404,6 @@ def remove_unlikely_candidates(self):
405404 elem .drop_tree ()
406405
407406 def transform_misused_divs_into_paragraphs (self ):
408- """Utility method"""
409407 for elem in self .tags (self .html , 'div' ):
410408 # transform <div>s that do not contain other block elements into
411409 # <p>s
0 commit comments