From 2e1d20c1e7bb21038be4c51a3a06a0868511e7b7 Mon Sep 17 00:00:00 2001 From: DarthPestilane <553061376@qq.com> Date: Wed, 16 May 2018 16:45:34 +0800 Subject: [PATCH 1/2] add method `matchedType` and fix ms-office matchers' rules --- fixtures/sample.doc | Bin 0 -> 15360 bytes match.go | 15 +++++++++++++++ match_test.go | 23 +++++++++++++++++++++++ matchers/document.go | 11 +++-------- 4 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 fixtures/sample.doc diff --git a/fixtures/sample.doc b/fixtures/sample.doc new file mode 100644 index 0000000000000000000000000000000000000000..45c0345065cf94c7b2eb5259622c3db96f60e914 GIT binary patch literal 15360 zcmeHOX>3$g6#m}Kw7e;mwopn%h*OwgsoD&Nr7A)zMF&(=EO9M#Y^!Z#+FGi_Emj1< z2wH=QCSu$Z6BARTCK_Xq9~y(8xPbb{U`+gF%%H}o(HXz*zW2&>0jHu;^quA`cR%mk zd(Ygh`#vl=`1H$Dzrqk$gdozHF!HnFEz0FYaTEimxN?S|sA)vWKAbof`1bi-xVb4D zV%JUFrB3vKG3T@MCwSozd6^jpUXsRxAkSgPM4vUKUzJ`2@od?P8_G)=@t#}eXuL)R zsiyLmk{6OkT(uXB19o$WTVZ87KD;dew2yb%pFw>lQX z&peHKrwdJnGKN~|$r8AQb@tG!T}HW^c%z|R^oJ2#z;-*BDrlkC`yulRxtRcUS(w?^5sDJ0^uH4*?_U`_wSPl(r2pNzivEt!3}RMUU;NKlfJ@Q2#QDS;;yJ{H z#9HF-udnMqv-_`p>6Upm=G&;TagL31Z7j60z{c4&uCmc!ZeA4mQH+aX(sMizBTj5daUD_>#hj>BJ1fkr6E*@x#1dj7aW%1t z7>ptj#hLZMCGQsSGv#uL*IH)qaH@W@mvCt)ZH%>fL@x_n?gQ3qONhA6D`Ooqmyw92ffI=C)ePpJq< z{j8kGPen*A>cKUgLA9bXUW<~yw#KJkCBFv;w_4a>=)Hz+G;|ZwfX$}d79-zp=mUoC zWL-h@sv_KD;h79JP`0qsDnSX}Hu1Ym%==bNGq8tp2zyQZ2Zru5^dmz*w!}U$^ixCk z8+yRd&n)SkvC6s9UQ987nV4}%=c8MVcH8jjBMcNFklIRK3_GPhFdt3Z79p65ne2qng|iqU83nKGOolqe250<4G>db;z=U zd_DO}4kT@BI!wtw%dwO#$-;jIS8xN6KY?pw+;ksG5>08GQL_2lUmJ8fM9S$W+ zM_WV0Oq_NeA9ZpArQ@VasMX+lkCdl1$JXMkcP(xu?V;Yy)ZDT2EsPNyJ$;6TpuL5i zypJYsvs7Zz##QY?$s<7s_l^|p##P7@5c=pgrsL$u^vG;Xjy2*gZnd106O)!lrbQ}I z8oL&Ez>XEf3S$#u#j#DZ%VTA+7co6HFTEgrcKY1(dFjRJy7ZFth3Tu)*QOiO*L$n* zQGIqVeZk)kS+giGvn1#GyOh(CL(36P=#I|u8~tp)1$+zm7Vs_LTfnz~Zvo!|z6E>> z_!jUj;9J1AfNufc0=@-&3yg6K=pv%iyn|ZG%ZMHx_TNU0ofGUiuLZIFvEd)!UbvWq zGTxMU|LtJT&WC3QdKc~g^5rKiys+)@@4o%SI+=ND-OO{cUQ(%OMQEdYd_Gb|Rp^@c z!TPuE=F*RIEn{ygv(T*7vGa`K9#U#-*P- zss{PuYIyxq*|OSIo#HAp4BYSb+BfNL`}Mr8=phnx-tD2u;u0?|n-8QHxb3TL!9YT` zFr(e zx!$$Kh~A;Q!UB(%Qg&=R=-)_8o?OY1^jEzHH$D0NFPF9zzxXiqKkbz-q>C;Sl_r#) zo^BNXmypNnIE{K;Mr!Ua^bdA2f>J)IAr`QPv1bsmLQrK-sbSggAVU? zdGSL|xXb0}bHYt7S2a${4jrgATk!m!%LnUOWbBxoeYEniw6A%fIgn;0@?qjW{C_Oq z^}pt%-o9ua>lu&UzVI0Y^wdIQLOxrsKu#j^T?XG&IPtne)eqAoqWro3Q%x^)rVv$b Ps>>w{e<%7db}aA*E0YDB literal 0 HcmV?d00001 diff --git a/match.go b/match.go index 9b6e376..ece105b 100644 --- a/match.go +++ b/match.go @@ -14,6 +14,21 @@ var Matchers = matchers.Matchers // NewMatcher is an alias to matchers.NewMatcher var NewMatcher = matchers.NewMatcher +// PossibleTypes returns the possible mime types of given bytes +func PossibleTypes(b []byte) ([]types.Type, error) { + if len(b) == 0 { + return nil, ErrEmptyBuffer + } + possibleTypes := []types.Type{} + for _, checker := range Matchers { + matchedType := checker(b) + if matchedType != types.Unknown && matchedType.Extension != "" { + possibleTypes = append(possibleTypes, matchedType) + } + } + return possibleTypes, nil +} + // Match infers the file type of a given buffer inspecting its magic numbers signature func Match(buf []byte) (types.Type, error) { length := len(buf) diff --git a/match_test.go b/match_test.go index 5dcbb5e..50f4585 100644 --- a/match_test.go +++ b/match_test.go @@ -185,3 +185,26 @@ func BenchmarkMatchPng(b *testing.B) { Match(pngBuffer) } } + +func TestPossibleTypes(t *testing.T) { + var docBuffer, _ = ioutil.ReadFile("./fixtures/sample.doc") + fileBytes := [][]byte{docBuffer} + for _, fileByte := range fileBytes { + pts, err := PossibleTypes(fileByte) + if err != nil { + t.Fail() + t.Error(err) + } + var success bool + for _, typ := range pts { + t.Logf("possible mime-type: %s, ext: %s", typ.MIME.Value, typ.Extension) + if typ.Extension == "doc" { + success = true + } + } + if !success { + t.Fail() + t.Error("matched failed") + } + } +} diff --git a/matchers/document.go b/matchers/document.go index cc5ded2..ffc2679 100644 --- a/matchers/document.go +++ b/matchers/document.go @@ -1,7 +1,5 @@ package matchers -import "bytes" - var ( TypeDoc = newType("doc", "application/msword") TypeDocx = newType("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document") @@ -31,8 +29,7 @@ func Doc(buf []byte) bool { func Docx(buf []byte) bool { return len(buf) > 3 && buf[0] == 0x50 && buf[1] == 0x4B && - buf[2] == 0x03 && buf[3] == 0x04 && - bytes.Contains(buf[:256], []byte(TypeDocx.MIME.Value)) + buf[2] == 0x03 && buf[3] == 0x04 } func Xls(buf []byte) bool { @@ -46,8 +43,7 @@ func Xls(buf []byte) bool { func Xlsx(buf []byte) bool { return len(buf) > 3 && buf[0] == 0x50 && buf[1] == 0x4B && - buf[2] == 0x03 && buf[3] == 0x04 && - bytes.Contains(buf[:256], []byte(TypeXlsx.MIME.Value)) + buf[2] == 0x03 && buf[3] == 0x04 } func Ppt(buf []byte) bool { @@ -61,6 +57,5 @@ func Ppt(buf []byte) bool { func Pptx(buf []byte) bool { return len(buf) > 3 && buf[0] == 0x50 && buf[1] == 0x4B && - buf[2] == 0x07 && buf[3] == 0x08 && - bytes.Contains(buf[:256], []byte(TypePptx.MIME.Value)) + buf[2] == 0x07 && buf[3] == 0x08 } From 2773097f1d258e837e6d7de266e16f6f8ef170a7 Mon Sep 17 00:00:00 2001 From: DarthPestilane Date: Fri, 18 May 2018 09:58:26 +0800 Subject: [PATCH 2/2] nothing... --- match_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/match_test.go b/match_test.go index 50f4585..5e74b7f 100644 --- a/match_test.go +++ b/match_test.go @@ -187,7 +187,7 @@ func BenchmarkMatchPng(b *testing.B) { } func TestPossibleTypes(t *testing.T) { - var docBuffer, _ = ioutil.ReadFile("./fixtures/sample.doc") + docBuffer, _ := ioutil.ReadFile("./fixtures/sample.doc") fileBytes := [][]byte{docBuffer} for _, fileByte := range fileBytes { pts, err := PossibleTypes(fileByte)