@@ -409,6 +409,54 @@ def decode_unicode_escape(value):
409409if (__version_info__ [3 ] is None ):
410410 __version__ = str (__version_info__ [0 ]) + "." + str (__version_info__ [1 ]) + "." + str (__version_info__ [2 ])
411411
412+ # ===== Module-level type code table & helpers (reuse anywhere) =====
413+
414+ FT = {
415+ "FILE" : 0 ,
416+ "HARDLINK" : 1 ,
417+ "SYMLINK" : 2 ,
418+ "CHAR" : 3 ,
419+ "BLOCK" : 4 ,
420+ "DIR" : 5 ,
421+ "FIFO" : 6 ,
422+ "FILE_ALT" : 7 , # treated like regular file
423+ "SOCK" : 8 ,
424+ "DOOR" : 9 ,
425+ "PORT" : 10 ,
426+ "WHT" : 11 ,
427+ "JUNCTION" : 13 ,
428+ }
429+
430+ # Base category for each concrete ftype (no unions here).
431+ BASE_CATEGORY_BY_CODE = {
432+ 0 : "files" ,
433+ 7 : "files" ,
434+ 1 : "hardlinks" ,
435+ 2 : "symlinks" ,
436+ 3 : "character" ,
437+ 4 : "block" ,
438+ 5 : "directories" ,
439+ 6 : "fifo" ,
440+ 8 : "sockets" ,
441+ 9 : "doors" ,
442+ 10 : "ports" ,
443+ 11 : "whiteouts" ,
444+ 13 : "junctions" ,
445+ }
446+
447+ # Union categories defined by which base codes should populate them.
448+ UNION_RULES = [
449+ ("links" , set ([FT ["HARDLINK" ], FT ["SYMLINK" ]])),
450+ ("devices" , set ([FT ["CHAR" ], FT ["BLOCK" ]])),
451+ ]
452+
453+ # Deterministic category order (handy for consistent output/printing).
454+ CATEGORY_ORDER = [
455+ "files" , "hardlinks" , "symlinks" , "character" , "block" ,
456+ "directories" , "fifo" , "sockets" , "doors" , "ports" ,
457+ "whiteouts" , "junctions" , "links" , "devices"
458+ ]
459+
412460# Robust bitness detection
413461# Works on Py2 & Py3, all platforms
414462
@@ -7737,72 +7785,122 @@ def ListDirToArray(infiles, dirlistfromtxt=False, fmttype=__file_format_default_
77377785 return listarrayfiles
77387786
77397787
7788+ # ===== Function (keeps inarray schema; returns entries + indexes) =====
7789+
77407790def CatFileArrayToArrayIndex (inarray , returnfp = False ):
7741- if (isinstance (inarray , dict )):
7742- listarrayfiles = inarray
7743- else :
7791+ """
7792+ Build a bidirectional index over an archive listing while preserving the
7793+ input 'inarray' as-is. Python 2/3 compatible, no external deps.
7794+
7795+ Input (unchanged contract):
7796+ inarray: dict with at least:
7797+ - 'ffilelist': list of dicts: {'fname': <str>, 'fid': <any>, 'ftype': <int>}
7798+ - 'fnumfiles': int (expected count)
7799+ - optional 'fp': any (passed through if returnfp=True)
7800+
7801+ Output structure:
7802+ {
7803+ 'list': inarray, # alias to original input (not copied)
7804+ 'fp': inarray.get('fp') or None,
7805+ 'entries': { fid: {'name': fname, 'type': ftype} },
7806+ 'indexes': {
7807+ 'by_name': { fname: fid },
7808+ 'by_type': {
7809+ <category>: {
7810+ 'by_name': { fname: fid },
7811+ 'by_id': { fid: fname },
7812+ 'count': <int>
7813+ }, ...
7814+ }
7815+ },
7816+ 'counts': {
7817+ 'total': <int>,
7818+ 'by_type': { <category>: <int>, ... }
7819+ },
7820+ 'unknown_types': { <ftype_int>: [fname, ...] }
7821+ }
7822+ """
7823+ if not isinstance (inarray , dict ):
77447824 return False
7745- if ( not listarrayfiles ) :
7825+ if not inarray :
77467826 return False
7747- outarray = {'list' : listarrayfiles , 'filetoid' : {}, 'idtofile' : {}, 'filetypes' : {'directories' : {'filetoid' : {}, 'idtofile' : {}}, 'files' : {'filetoid' : {}, 'idtofile' : {}}, 'links' : {'filetoid' : {}, 'idtofile' : {}}, 'symlinks' : {'filetoid' : {
7748- }, 'idtofile' : {}}, 'hardlinks' : {'filetoid' : {}, 'idtofile' : {}}, 'character' : {'filetoid' : {}, 'idtofile' : {}}, 'block' : {'filetoid' : {}, 'idtofile' : {}}, 'fifo' : {'filetoid' : {}, 'idtofile' : {}}, 'devices' : {'filetoid' : {}, 'idtofile' : {}}}}
7749- if (returnfp ):
7750- outarray .update ({'fp' : listarrayfiles ['fp' ]})
7751- else :
7752- outarray .update ({'fp' : None })
7753- lenlist = len (listarrayfiles ['ffilelist' ])
7754- lcfi = 0
7755- lcfx = int (listarrayfiles ['fnumfiles' ])
7756- if (lenlist > listarrayfiles ['fnumfiles' ] or lenlist < listarrayfiles ['fnumfiles' ]):
7757- lcfx = int (lenlist )
7758- else :
7759- lcfx = int (listarrayfiles ['fnumfiles' ])
7760- while (lcfi < lcfx ):
7761- filetoidarray = {listarrayfiles ['ffilelist' ][lcfi ]
7762- ['fname' ]: listarrayfiles ['ffilelist' ][lcfi ]['fid' ]}
7763- idtofilearray = {listarrayfiles ['ffilelist' ][lcfi ]
7764- ['fid' ]: listarrayfiles ['ffilelist' ][lcfi ]['fname' ]}
7765- outarray ['filetoid' ].update (filetoidarray )
7766- outarray ['idtofile' ].update (idtofilearray )
7767- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 0 or listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 7 ):
7768- outarray ['filetypes' ]['files' ]['filetoid' ].update (filetoidarray )
7769- outarray ['filetypes' ]['files' ]['idtofile' ].update (idtofilearray )
7770- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 1 ):
7771- outarray ['filetypes' ]['hardlinks' ]['filetoid' ].update (
7772- filetoidarray )
7773- outarray ['filetypes' ]['hardlinks' ]['idtofile' ].update (
7774- idtofilearray )
7775- outarray ['filetypes' ]['links' ]['filetoid' ].update (filetoidarray )
7776- outarray ['filetypes' ]['links' ]['idtofile' ].update (idtofilearray )
7777- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 2 ):
7778- outarray ['filetypes' ]['symlinks' ]['filetoid' ].update (filetoidarray )
7779- outarray ['filetypes' ]['symlinks' ]['idtofile' ].update (idtofilearray )
7780- outarray ['filetypes' ]['links' ]['filetoid' ].update (filetoidarray )
7781- outarray ['filetypes' ]['links' ]['idtofile' ].update (idtofilearray )
7782- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 3 ):
7783- outarray ['filetypes' ]['character' ]['filetoid' ].update (
7784- filetoidarray )
7785- outarray ['filetypes' ]['character' ]['idtofile' ].update (
7786- idtofilearray )
7787- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7788- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7789- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 4 ):
7790- outarray ['filetypes' ]['block' ]['filetoid' ].update (filetoidarray )
7791- outarray ['filetypes' ]['block' ]['idtofile' ].update (idtofilearray )
7792- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7793- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7794- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 5 ):
7795- outarray ['filetypes' ]['directories' ]['filetoid' ].update (
7796- filetoidarray )
7797- outarray ['filetypes' ]['directories' ]['idtofile' ].update (
7798- idtofilearray )
7799- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 6 ):
7800- outarray ['filetypes' ]['symlinks' ]['filetoid' ].update (filetoidarray )
7801- outarray ['filetypes' ]['symlinks' ]['idtofile' ].update (idtofilearray )
7802- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7803- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7804- lcfi = lcfi + 1
7805- return outarray
7827+
7828+ # Buckets for categories
7829+ def _bucket ():
7830+ return {"by_name" : {}, "by_id" : {}, "count" : 0 }
7831+
7832+ by_type = {}
7833+ for cat in CATEGORY_ORDER :
7834+ by_type [cat ] = _bucket ()
7835+
7836+ out = {
7837+ "list" : inarray ,
7838+ "fp" : inarray .get ("fp" ) if returnfp else None ,
7839+ "entries" : {},
7840+ "indexes" : {
7841+ "by_name" : {},
7842+ "by_type" : by_type ,
7843+ },
7844+ "counts" : {"total" : 0 , "by_type" : {}},
7845+ "unknown_types" : {},
7846+ }
7847+
7848+ ffilelist = inarray .get ("ffilelist" ) or []
7849+ try :
7850+ fnumfiles = int (inarray .get ("fnumfiles" , len (ffilelist )))
7851+ except Exception :
7852+ fnumfiles = len (ffilelist )
7853+
7854+ # Process only what's present
7855+ total = min (len (ffilelist ), fnumfiles )
7856+
7857+ def _add (cat , name , fid ):
7858+ b = by_type [cat ]
7859+ b ["by_name" ][name ] = fid
7860+ b ["by_id" ][fid ] = name
7861+ # Count is number of unique names in this category
7862+ b ["count" ] = len (b ["by_name" ])
7863+
7864+ i = 0
7865+ while i < total :
7866+ e = ffilelist [i ]
7867+ name = e .get ("fname" )
7868+ fid = e .get ("fid" )
7869+ t = e .get ("ftype" )
7870+
7871+ if name is None or fid is None or t is None :
7872+ i += 1
7873+ continue
7874+
7875+ # Store canonical entry once, keyed by fid
7876+ out ["entries" ][fid ] = {"name" : name , "type" : t }
7877+
7878+ # Global reverse index for fast name -> id
7879+ out ["indexes" ]["by_name" ][name ] = fid
7880+
7881+ # Base category
7882+ base_cat = BASE_CATEGORY_BY_CODE .get (t )
7883+ if base_cat is not None :
7884+ _add (base_cat , name , fid )
7885+ else :
7886+ # Track unknown codes for visibility/forward-compat
7887+ lst = out ["unknown_types" ].setdefault (t , [])
7888+ if name not in lst :
7889+ lst .append (name )
7890+
7891+ # Union categories
7892+ for union_name , code_set in UNION_RULES :
7893+ if t in code_set :
7894+ _add (union_name , name , fid )
7895+
7896+ i += 1
7897+
7898+ # Counts
7899+ out ["counts" ]["total" ] = total
7900+ for cat in CATEGORY_ORDER :
7901+ out ["counts" ]["by_type" ][cat ] = by_type [cat ]["count" ]
7902+
7903+ return out
78067904
78077905
78087906def RePackCatFile (infile , outfile , fmttype = "auto" , compression = "auto" , compresswholefile = True , compressionlevel = None , compressionuselist = compressionlistalt , followlink = False , filestart = 0 , seekstart = 0 , seekend = 0 , checksumtype = ["crc32" , "crc32" , "crc32" , "crc32" ], skipchecksum = False , extradata = [], jsondata = {}, formatspecs = __file_format_dict__ , seektoend = False , verbose = False , returnfp = False ):
0 commit comments