@@ -32,20 +32,19 @@ async def check_for_ordinance_info(
3232 Parameters
3333 ----------
3434 doc : elm.web.document.BaseDocument
35- A document potentially containing ordinance information. Note
36- that if the document's attrs contains the
37- ``"contains_ord_info"`` key, it will not be processed. To force
38- a document to be processed by this function, remove that key
39- from the documents attrs.
35+ A document instance (PDF, HTML, etc) potentially containing
36+ ordinance information. Note that if the document's `` attrs``
37+ has the ``"contains_ord_info"`` key, it will not be processed.
38+ To force a document to be processed by this function, remove
39+ that key from the documents `` attrs`` .
4040 tech : str
4141 Technology of interest (e.g. "solar", "wind", etc). This is
4242 used to set up some document validation decision trees.
43- text_splitter : obj
44- Instance of an object that implements a `split_text` method.
45- The method should take text as input (str) and return a list
46- of text chunks. Langchain's text splitters should work for this
47- input.
48- usage_tracker : compass.services.usage.UsageTracker, optional
43+ text_splitter : LCTextSplitter, optional
44+ Optional Langchain text splitter (or subclass instance), or any
45+ object that implements a `split_text` method. The method should
46+ take text as input (str) and return a list of text chunks.
47+ usage_tracker : UsageTracker, optional
4948 Optional tracker instance to monitor token usage during
5049 LLM calls. By default, ``None``.
5150
@@ -131,7 +130,7 @@ async def extract_date(doc, model_config, usage_tracker=None):
131130 ----------
132131 doc : elm.web.document.BaseDocument
133132 A document potentially containing date information.
134- usage_tracker : compass.services.usage. UsageTracker, optional
133+ usage_tracker : UsageTracker, optional
135134 Optional tracker instance to monitor token usage during
136135 LLM calls. By default, ``None``.
137136
@@ -165,20 +164,17 @@ async def extract_ordinance_text_with_llm(
165164 doc : elm.web.document.BaseDocument
166165 A document known to contain ordinance information. This means it
167166 must contain an ``"ordinance_text"`` key in the attrs. You can
168- run :func:`~compass.extraction.apply. check_for_ordinance_info`
167+ run :func:`check_for_ordinance_info`
169168 to have this attribute populated automatically for documents
170169 that are found to contain ordinance data. Note that if the
171170 document's attrs does not contain the ``"ordinance_text"``
172171 key, you will get an error.
173- text_splitter : obj
174- Instance of an object that implements a `split_text` method.
175- The method should take text as input (str) and return a list
176- of text chunks. Langchain's text splitters should work for this
177- input.
178- extractor : compass.extraction.ordinance.WindOrdinanceTextExtractor
179- Instance of
180- :class:`~compass.extraction.ordinance.WindOrdinanceTextExtractor`
181- used for ordinance text extraction.
172+ text_splitter : LCTextSplitter, optional
173+ Optional Langchain text splitter (or subclass instance), or any
174+ object that implements a `split_text` method. The method should
175+ take text as input (str) and return a list of text chunks.
176+ extractor : WindOrdinanceTextExtractor
177+ Object used for ordinance text extraction.
182178 original_text_key : str
183179 String corresponding to the `doc.attrs` key containing the
184180 original text (before extraction).
@@ -237,11 +233,10 @@ async def extract_ordinance_text_with_ngram_validation(
237233 that are found to contain ordinance data. Note that if the
238234 document's attrs does not contain the ``"ordinance_text"``
239235 key, it will not be processed.
240- text_splitter : obj
241- Instance of an object that implements a `split_text` method.
242- The method should take text as input (str) and return a list
243- of text chunks. Langchain's text splitters should work for this
244- input.
236+ text_splitter : LCTextSplitter, optional
237+ Optional Langchain text splitter (or subclass instance), or any
238+ object that implements a `split_text` method. The method should
239+ take text as input (str) and return a list of text chunks.
245240 original_text_key : str
246241 String corresponding to the `doc.attrs` key containing the
247242 original text (before extraction).
0 commit comments