|
6 | 6 | from data_scraper.common import constants |
7 | 7 | from data_scraper.core.scraper import JiraScraper, OSPDocScraper |
8 | 8 | from data_scraper.core.errata_scraper import ErrataScraper |
| 9 | +from data_scraper.core.solutions_scraper import SolutionsScraper |
9 | 10 |
|
10 | 11 | logging.basicConfig( |
11 | 12 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
@@ -178,3 +179,49 @@ def errata_scraper() -> None: |
178 | 179 |
|
179 | 180 | scraper = ErrataScraper(config_args) |
180 | 181 | scraper.run() |
| 182 | + |
| 183 | +def solutions_scraper() -> None: |
| 184 | + """Entry point for command line execution.""" |
| 185 | + parser = ArgumentParser("solutions_scraper") |
| 186 | + |
| 187 | + # Required arguments |
| 188 | + parser.add_argument("--database_client_url", type=str, required=True) |
| 189 | + parser.add_argument("--llm_server_url", type=str, required=True) |
| 190 | + parser.add_argument("--llm_api_key", type=str, required=True) |
| 191 | + parser.add_argument("--database_api_key", type=str, required=True) |
| 192 | + parser.add_argument("--solutions-token", type=str, required=True) |
| 193 | + |
| 194 | + # Optional arguments |
| 195 | + parser.add_argument("--solutions-url", type=str, |
| 196 | + default=constants.DEFAULT_SOLUTIONS_PUBLIC_URL) |
| 197 | + parser.add_argument("--max_results", type=int, |
| 198 | + default=constants.SOLUTIONS_MAX_RESULTS) |
| 199 | + parser.add_argument("--chunk_size", type=int, |
| 200 | + default=constants.DEFAULT_CHUNK_SIZE) |
| 201 | + parser.add_argument("--embedding_model", type=str, |
| 202 | + default=constants.DEFAULT_EMBEDDING_MODEL) |
| 203 | + parser.add_argument("--db_collection_name", type=str, |
| 204 | + default=constants.SOLUTIONS_COLLECTION_NAME) |
| 205 | + parser.add_argument("--product_name", type=str, |
| 206 | + default=constants.SOLUTIONS_PRODUCT_NAME) |
| 207 | + parser.add_argument("--recreate_collection", type=bool, default=True, |
| 208 | + help="Recreate database collection from scratch.") |
| 209 | + args = parser.parse_args() |
| 210 | + |
| 211 | + config_args = { |
| 212 | + "database_client_url": args.database_client_url, |
| 213 | + "llm_server_url": args.llm_server_url, |
| 214 | + "llm_api_key": args.llm_api_key, |
| 215 | + "database_api_key": args.database_api_key, |
| 216 | + "chunk_size": args.chunk_size, |
| 217 | + "embedding_model": args.embedding_model, |
| 218 | + "db_collection_name": args.db_collection_name, |
| 219 | + "solutions_url": args.solutions_url, |
| 220 | + "solutions_token": args.solutions_token, |
| 221 | + "product_name": args.product_name, |
| 222 | + "max_results": args.max_results, |
| 223 | + "recreate_collection": args.recreate_collection, |
| 224 | + } |
| 225 | + |
| 226 | + scraper = SolutionsScraper(config_args) |
| 227 | + scraper.run() |
0 commit comments