|
4 | 4 | from .HTMLparsers import getSchiHubPDF, SciHubUrls |
5 | 5 | import random |
6 | 6 | from .NetInfo import NetInfo |
| 7 | +from .Utils import URLjoin |
| 8 | + |
7 | 9 |
|
8 | 10 | # Import enhanced downloader for improved experience |
9 | 11 | try: |
|
14 | 16 | print("Enhanced downloader not available. Install pySmartDL for better downloading experience.") |
15 | 17 |
|
16 | 18 | def setSciHubUrl(): |
| 19 | + print("Searching for a sci-hub mirror") |
17 | 20 | r = requests.get(NetInfo.SciHub_URLs_repo, headers=NetInfo.HEADERS) |
18 | 21 | links = SciHubUrls(r.text) |
19 | | - found = False |
20 | 22 |
|
21 | 23 | for l in links: |
22 | 24 | try: |
| 25 | + print("Trying with {}...".format(l)) |
23 | 26 | r = requests.get(l, headers=NetInfo.HEADERS) |
24 | 27 | if r.status_code == 200: |
25 | | - found = True |
26 | 28 | NetInfo.SciHub_URL = l |
27 | 29 | break |
28 | 30 | except: |
29 | 31 | pass |
30 | | - if found: |
31 | | - print("\nUsing {} as Sci-Hub instance".format(NetInfo.SciHub_URL)) |
32 | 32 | else: |
33 | | - print("\nNo working Sci-Hub instance found!\nIf in your country Sci-Hub is not available consider using a VPN or a proxy") |
| 33 | + print( |
| 34 | + "\nNo working Sci-Hub instance found!\nIf in your country Sci-Hub is not available consider using a VPN or a proxy\nYou can use a specific mirror mirror with the --scihub-mirror argument") |
34 | 35 | NetInfo.SciHub_URL = "https://sci-hub.st" |
35 | 36 |
|
36 | 37 |
|
37 | 38 | def getSaveDir(folder, fname): |
38 | 39 | dir_ = path.join(folder, fname) |
39 | 40 | n = 1 |
40 | 41 | while path.exists(dir_): |
41 | | - n += 1 |
42 | | - dir_ = path.join(folder, "("+str(n)+")"+fname) |
| 42 | + n += 1 |
| 43 | + dir_ = path.join(folder, f"({n}){fname}") |
43 | 44 |
|
44 | 45 | return dir_ |
45 | 46 |
|
46 | | -def saveFile(file_name,content, paper,dwn_source): |
| 47 | + |
| 48 | +def saveFile(file_name, content, paper, dwn_source): |
47 | 49 | f = open(file_name, 'wb') |
48 | 50 | f.write(content) |
49 | 51 | f.close() |
@@ -86,49 +88,59 @@ def URLjoin(*args): |
86 | 88 | return "/".join(map(lambda x: str(x).rstrip('/'), args)) |
87 | 89 |
|
88 | 90 | NetInfo.SciHub_URL = SciHub_URL |
89 | | - if NetInfo.SciHub_URL==None: |
| 91 | + if NetInfo.SciHub_URL is None: |
90 | 92 | setSciHubUrl() |
| 93 | + if SciDB_URL is not None: |
| 94 | + NetInfo.SciDB_URL = SciDB_URL |
| 95 | + |
| 96 | + print("\nUsing Sci-Hub mirror {}".format(NetInfo.SciHub_URL)) |
| 97 | + print("Using Sci-DB mirror {}".format(NetInfo.SciDB_URL)) |
| 98 | + print("You can use --scidb-mirror and --scidb-mirror to specify your're desired mirror URL\n") |
91 | 99 |
|
92 | 100 | num_downloaded = 0 |
93 | 101 | paper_number = 1 |
94 | 102 | paper_files = [] |
95 | 103 | for p in papers: |
96 | | - if p.canBeDownloaded() and (num_limit==None or num_downloaded<num_limit): |
97 | | - print("Download {} of {} -> {}".format(paper_number, scholar_results, p.title)) |
| 104 | + if p.canBeDownloaded() and (num_limit is None or num_downloaded < num_limit): |
| 105 | + print("Download {} of {} -> {}".format(paper_number, len(papers), p.title)) |
98 | 106 | paper_number += 1 |
99 | 107 |
|
100 | 108 | pdf_dir = getSaveDir(dwnl_dir, p.getFileName()) |
101 | 109 |
|
102 | | - faild = 0 |
103 | | - while p.downloaded==False and faild!=4: |
| 110 | + failed = 0 |
| 111 | + url = "" |
| 112 | + while not p.downloaded and failed != 5: |
104 | 113 | try: |
105 | | - dwn_source = 1 #1 scihub 2 scholar |
106 | | - if faild==0 and p.DOI!=None: |
| 114 | + dwn_source = 1 # 1 scidb - 2 scihub - 3 scholar |
| 115 | + if failed == 0 and p.DOI is not None: |
| 116 | + url = URLjoin(NetInfo.SciDB_URL, p.DOI) |
| 117 | + if failed == 1 and p.DOI is not None: |
107 | 118 | url = URLjoin(NetInfo.SciHub_URL, p.DOI) |
108 | | - if faild==1 and p.scholar_link!=None: |
| 119 | + dwn_source = 2 |
| 120 | + if failed == 2 and p.scholar_link is not None: |
109 | 121 | url = URLjoin(NetInfo.SciHub_URL, p.scholar_link) |
110 | | - if faild==2 and p.scholar_link!=None and p.scholar_link[-3:]=="pdf": |
| 122 | + if failed == 3 and p.scholar_link is not None and p.scholar_link[-3:] == "pdf": |
111 | 123 | url = p.scholar_link |
112 | | - dwn_source = 2 |
113 | | - if faild==3 and p.pdf_link!=None: |
| 124 | + dwn_source = 3 |
| 125 | + if failed == 4 and p.pdf_link is not None: |
114 | 126 | url = p.pdf_link |
115 | | - dwn_source = 2 |
| 127 | + dwn_source = 3 |
116 | 128 |
|
117 | | - if url!="": |
| 129 | + if url != "": |
118 | 130 | r = requests.get(url, headers=NetInfo.HEADERS) |
119 | 131 | content_type = r.headers.get('content-type') |
120 | 132 |
|
121 | | - if dwn_source==1 and 'application/pdf' not in content_type: |
122 | | - time.sleep(random.randint(1,5)) |
| 133 | + if (dwn_source == 1 or dwn_source == 2) and 'application/pdf' not in content_type and "application/octet-stream" not in content_type: |
| 134 | + time.sleep(random.randint(1, 4)) |
123 | 135 |
|
124 | 136 | pdf_link = getSchiHubPDF(r.text) |
125 | | - if(pdf_link != None): |
| 137 | + if pdf_link is not None: |
126 | 138 | r = requests.get(pdf_link, headers=NetInfo.HEADERS) |
127 | 139 | content_type = r.headers.get('content-type') |
128 | 140 |
|
129 | | - if 'application/pdf' in content_type: |
130 | | - paper_files.append(saveFile(pdf_dir,r.content,p,dwn_source)) |
| 141 | + if 'application/pdf' in content_type or "application/octet-stream" in content_type: |
| 142 | + paper_files.append(saveFile(pdf_dir, r.content, p, dwn_source)) |
131 | 143 | except Exception: |
132 | 144 | pass |
133 | 145 |
|
134 | | - faild += 1 |
| 146 | + failed += 1 |
0 commit comments