From e22996e6ac99e5d38dd8ac9a0102d2f4d041e786 Mon Sep 17 00:00:00 2001
From: Tron <ironboxplus@gmail.com>
Date: Mon, 7 Oct 2024 15:27:38 +0800
Subject: [PATCH] Update fofaMain.py

---
 core/fofaMain.py | 137 +++++++++++++++++++++--------------------------
 1 file changed, 60 insertions(+), 77 deletions(-)

diff --git a/core/fofaMain.py b/core/fofaMain.py
index e4f0eea0..dacfd9a0 100644
--- a/core/fofaMain.py
+++ b/core/fofaMain.py
@@ -1,16 +1,9 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/9/24 22:13
-# @Author  : Cl0udG0d
-# @File    : fofaMain.py
-# @Github: https://github.com/Cl0udG0d
 import json
 import os
 import random
 import sys
 import urllib
-from datetime import datetime
-from datetime import timedelta
+from datetime import datetime, timedelta
 import base64
 import time
 from urllib.parse import quote_plus
@@ -30,6 +23,7 @@
     dir = sys._MEIPASS
 else:
     dir = config.ROOT_PATH
+
 # 获取当前的语言设置
 lang, _ = locale.getdefaultlocale()
 if lang.startswith('zh'):
@@ -290,16 +284,14 @@ def resetCityKeyword(self, keyURL, key):
             if key in search_key:
                 pattern = r'{}="([^"]+)"'.format(key)
                 match = re.search(pattern, search_key)
-                city = match.group(1)
-                return city
+                if match:
+                    city = match.group(1)
+                    return city
         return None
 
     def check_authorization_is_available(self):
 
-        # au = config.AUTHORIZATION_LIST.pop()
-        # print(au)
-        # print(config.AUTHORIZATION_LIST)
-        while len(config.AUTHORIZATION_LIST)>0:
+        while len(config.AUTHORIZATION_LIST) > 0:
             config.AUTHORIZATION = config.AUTHORIZATION_LIST.pop()
 
             try:
@@ -337,7 +329,7 @@ def setIndexTimestamp(self, searchbs64, timestamp_index):
 
             request_url = getUrl(searchbs64)
             if config.DEBUG:
-                print("[+] 当前请求网址: "+request_url)
+                print("[+] 当前请求网址: " + request_url)
 
             rep = requests.get(request_url, headers=fofaUseragent.getFofaPageNumHeaders(), timeout=self.timeout,
                                proxies=self.get_proxy())
@@ -366,10 +358,7 @@ def fofaSpiderOnePageData(self, search_key, searchbs64, timestamp_index):
         获取fofa一页的数据
         :rtype: object
         """
-        # searchbs64 = searchbs64.replace("%3D", "=")
-        # init_search_key = base64.b64decode(searchbs64).decode()
         init_search_key = search_key
-        # if not config.AUTHORIZATION:
         print("\033[1;34mnow search key: {}\033[0m".format(init_search_key))
         TEMP_RETRY_NUM = 0
 
@@ -425,49 +414,47 @@ def checkDataIsUpdate(self):
 
     def fofaSpider(self, search_key, searchbs64, index):
         """
-        爬取某关键字的fofa数据
-        @param search_key:
-        @param searchbs64:
-        @param index:
-        @return:
+        迭代方式爬取 Fofa 数据，避免递归调用导致的栈溢出。
         """
-        # while len(self.host_set) < self.endcount and self.old_length !=len(self.host_set):
-        self.old_length = len(self.host_set)
-        self.timestamp_list[index].clear()
-        context = self.fofaSpiderOnePageData(search_key, searchbs64, index)
+        stack = [(search_key, searchbs64, index)]
 
-        if self.EXIT_FLAG:
-            return
+        while stack and not self.EXIT_FLAG:
+            current_key, current_bs64, current_index = stack.pop()
+            self.old_length = len(self.host_set)
+            self.timestamp_list[current_index].clear()
+            context = self.fofaSpiderOnePageData(current_key, current_bs64, current_index)
 
-        if len(self.host_set) >= self.endcount:
-            print(colorize(_("[*] 在{}节点,数据爬取结束").format(index), "green"))
-            if self.output == 'txt':
-                finalint = self.removeDuplicate()
-                print(colorize(_('[*] 去重结束，最终数据 {} 条').format(str(finalint)), "green"))
+            if self.EXIT_FLAG:
+                break
+
+            if len(self.host_set) >= self.endcount:
+                print(colorize(_("[*] 在{}节点,数据爬取结束").format(current_index), "green"))
+                if self.output == 'txt':
+                    finalint = self.removeDuplicate()
+                    print(colorize(_('[*] 去重结束，最终数据 {} 条').format(str(finalint)), "green"))
+                else:
+                    print(colorize(_('[*] 输出类型为其他,不进行去重操作 '), "green"))
+                self.EXIT_FLAG = True
+                break
+
+            if self.old_length == len(self.host_set):
+                self.no_new_data_count += 1
+                if self.no_new_data_count == 2:
+                    print(colorize(_("[-] {}节点数据无新增,该节点枯萎").format(current_index), "red"))
+                    continue
             else:
-                print(colorize(_('[*] 输出类型为其他,不进行去重操作 '), "green"))
-            self.EXIT_FLAG = True
-            return
-        if self.old_length == len(self.host_set):
-            self.no_new_data_count += 1
-            if self.no_new_data_count == 2:
-                print(colorize(_("[-] {}节点数据无新增,该节点枯萎").format(index), "red"))
-                return
-        else:
-            self.no_new_data_count = 0
+                self.no_new_data_count = 0
 
-        if self.fuzz and not self.EXIT_FLAG:
-            self.fofaFuzzSpider(search_key, context, index)
+            if self.fuzz and not self.EXIT_FLAG:
+                new_tasks = self.fofaFuzzSpider(current_key, context, current_index)
+                stack.extend(new_tasks)  # 将新任务添加到栈中
 
-        search_key_modify = self.modifySearchTimeUrl(search_key, index)
-        # 特判，如果destroy不exit的话就会出错
-        if search_key_modify == 'end':
-            return
-        # print(search_key_modify)
-        searchbs64_modify = urllib.parse.quote(base64.b64encode(search_key_modify.encode("utf-8")))
-        # search_key = search_key_modify
-        # searchbs64 = searchbs64_modify
-        self.fofaSpider(search_key_modify, searchbs64_modify, index)
+            search_key_modify = self.modifySearchTimeUrl(current_key, current_index)
+            if search_key_modify == 'end':
+                continue
+
+            searchbs64_modify = urllib.parse.quote(base64.b64encode(search_key_modify.encode("utf-8")))
+            stack.append((search_key_modify, searchbs64_modify, current_index))
 
     def isPortInKeyword(self):
         """
@@ -481,7 +468,7 @@ def isPortInKeyword(self):
                 return False
         if "host" in self.search_key:
             result = re.findall('host="(.*?)"', self.search_key)
-            if len(result) > 0 and ":" in result:
+            if len(result) > 0 and ":" in result[0]:
                 return False
         return True
 
@@ -498,16 +485,16 @@ def fuzzListAdd(self):
 
     def fofaFuzzSpider(self, search_key, context, index):
         """
-        递归调用 fofaSpider 方法不断 fuzz
-        @param search_key:
-        @param searchbs64:
-        @return:
+        生成新的 fuzz 任务，而不是递归调用 fofaSpider。
+        返回一个包含新任务的列表。
         """
 
         '''
             fuzz部分
         '''
         FUZZ_LIST = ["country", "org", "asn", "port"]
+        new_tasks = []
+
         for fuzzKey in FUZZ_LIST:
             if fuzzKey not in search_key:
                 if fuzzKey == "country":
@@ -520,24 +507,22 @@ def fofaFuzzSpider(self, search_key, context, index):
                     dataList = self.bypassPort(context, index)
                 else:
                     dataList = []
-                # country_list = self.bypassCountry(context, index)
+
                 for data in dataList:
-                    new_key = search_key + ' && {}="{}"'.format(fuzzKey, data)
-                    # print("new_key: "+new_key)
+                    new_key = search_key + f' && {fuzzKey}="{data}"'
                     searchbs64_modify = urllib.parse.quote(base64.b64encode(new_key.encode("utf-8")))
                     self.fuzzListAdd()
                     self.setIndexTimestamp(searchbs64_modify, self.timestamp_index)
-                    # self.fofaSpiderOnePageData(search_key,searchbs64_modify,self.timestamp_index)
-                    self.fofaSpider(new_key, searchbs64_modify, self.timestamp_index)
+                    new_tasks.append((new_key, searchbs64_modify, self.timestamp_index))
 
                 for data in dataList:
-                    new_key = search_key + ' && {}!="{}"'.format(fuzzKey, data)
-                    # print("new_key: "+new_key)
+                    new_key = search_key + f' && {fuzzKey}!="{data}"'
                     searchbs64_modify = urllib.parse.quote(base64.b64encode(new_key.encode("utf-8")))
                     self.fuzzListAdd()
                     self.setIndexTimestamp(searchbs64_modify, self.timestamp_index)
-                    # self.fofaSpiderOnePageData(search_key,searchbs64_modify,self.timestamp_index)
-                    self.fofaSpider(new_key, searchbs64_modify, self.timestamp_index)
+                    new_tasks.append((new_key, searchbs64_modify, self.timestamp_index))
+
+        return new_tasks
 
     def modifySearchTimeUrl(self, search_key, index):
         """
@@ -554,7 +539,8 @@ def modifySearchTimeUrl(self, search_key, index):
             if "before=" in search_key:
                 pattern = r'before="([^"]+)"'
                 match = re.search(pattern, search_key)
-                before_time_in_search_key = match.group(1)
+                if match:
+                    before_time_in_search_key = match.group(1)
             time_before_time_in_search_key = datetime.strptime(before_time_in_search_key, "%Y-%m-%d").date()
             # print(self.timestamp_list)
             # print(index)
@@ -594,7 +580,8 @@ def modifySearchTimeUrl(self, search_key, index):
             if "before=" in search_key:
                 pattern = r'before="([^"]+)"'
                 match = re.search(pattern, search_key)
-                before_time_in_search_key = match.group(1)
+                if match:
+                    before_time_in_search_key = match.group(1)
             time_before_time_in_search_key = datetime.strptime(before_time_in_search_key, '%Y-%m-%d %H:%M:%S')
             timestamp_list = list(self.timestamp_list[index])
             timestamp_list.sort()
@@ -603,17 +590,16 @@ def modifySearchTimeUrl(self, search_key, index):
                 self._destroy()
 
             if config.DEBUG:
-                print("[-] timestamp_list:"+str(timestamp_list))
+                print("[-] timestamp_list:" + str(timestamp_list))
             time_first = timestamp_list[0].strip('\n').strip()
             if config.DEBUG:
-                print("[-] time_first: "+time_first)
+                print("[-] time_first: " + time_first)
             time_first_time = datetime.strptime(time_first, '%Y-%m-%d %H:%M:%S')
             time_before = time_first_time + timedelta(hours=1)
 
             if time_before >= time_before_time_in_search_key:
                 time_before = time_before_time_in_search_key - timedelta(hours=1)
 
-
             if 'before' in search_key:
                 search_key = search_key.split('&& before')[0]
                 search_key = search_key.strip(' ')
@@ -679,8 +665,6 @@ def start(self):
         print(colorize(_('[*] 开始运行'), "green"))
         if self.inputfile:
             with open(self.inputfile, 'r') as f:
-                # self.filename = "{}_{}.{}".format(unit.md5(self.search_key), int(time.time()), self.output)
-                # self.output_data = OutputData(self.filename, self.level, pattern=self.output)
                 for line in f.readlines():
                     self.cleanInitParameters()
                     self.search_key = clipKeyWord(line.strip())
@@ -712,7 +696,6 @@ def start(self):
     def _destroy(self):
         self.removeDuplicate()
         if not self.inputfile:
-            
             sys.exit(0)