From 08b64ad3b552c81d8e83d339b7a18066d4a7ff3c Mon Sep 17 00:00:00 2001 From: xeals Date: Wed, 2 May 2018 23:44:09 +1000 Subject: [PATCH] Detect MIME types on missing SQL entries Using magic number signatures to detect file types if the metadata entry is missing. Closes #10. --- Gopkg.lock | 17 ++++++++++++++++- Gopkg.toml | 4 ++++ cmd/extract.go | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 2465a9d..06a7cd5 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -7,6 +7,12 @@ revision = "925541529c1fa6821df4e44ce2723319eb2be768" version = "v1.0.0" +[[projects]] + name = "github.com/h2non/filetype" + packages = ["."] + revision = "cc14fdc9ca0e4c2bafad7458f6ff79fd3947cfbb" + version = "v1.0.5" + [[projects]] name = "github.com/pkg/errors" packages = ["."] @@ -25,9 +31,18 @@ packages = ["hkdf"] revision = "b2aa35443fbc700ab74c586ae79b81c171851023" +[[projects]] + name = "gopkg.in/h2non/filetype.v1" + packages = [ + "matchers", + "types" + ] + revision = "cc14fdc9ca0e4c2bafad7458f6ff79fd3947cfbb" + version = "v1.0.5" + [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "749de642a68468ba1bc2cdf2b65bfd6626f79ae2f1468afac765668bd267ebf1" + inputs-digest = "652287263fb82ba7e3e67a8a0ae74053654804e59a30bc6ffdcd6258d7de8d4c" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 52716c7..227ad7a 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -44,3 +44,7 @@ [[constraint]] name = "github.com/urfave/cli" version = "1.20.0" + +[[constraint]] + name = "github.com/h2non/filetype" + version = "1.0.5" diff --git a/cmd/extract.go b/cmd/extract.go index c69b59a..0c4b12c 100644 --- a/cmd/extract.go +++ b/cmd/extract.go @@ -2,9 +2,11 @@ package cmd import ( "fmt" + "io/ioutil" "log" "os" + "github.com/h2non/filetype" "github.com/pkg/errors" "github.com/urfave/cli" "github.com/xeals/signal-back/types" @@ -71,12 +73,19 @@ func ExtractAttachments(bf *types.BackupFile) error { ps := f.GetStatement().GetParameters() if len(ps) == 25 { // Contains blob information aEncs[*ps[19].IntegerParameter] = *ps[3].StringParamter + log.Printf("found attachment metadata %v: `%v`\n", *ps[19].IntegerParameter, ps) } if a := f.GetAttachment(); a != nil { - ext := getExt(aEncs[*a.AttachmentId], *a.AttachmentId) - fileName := fmt.Sprintf("%v%s", *a.AttachmentId, ext) + log.Printf("found attachment binary %v\n\n", *a.AttachmentId) + id := *a.AttachmentId + + mime, hasMime := aEncs[id] + ext := getExt(mime, id) + + fileName := fmt.Sprintf("%v%s", id, ext) file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { return errors.Wrap(err, "failed to open output file") } @@ -86,6 +95,22 @@ func ExtractAttachments(bf *types.BackupFile) error { if err = file.Close(); err != nil { return errors.Wrap(err, "failed to close output file") } + + if !hasMime { // Time to look into the file itself and guess. + buf, err := ioutil.ReadFile(fileName) + if err != nil { + return errors.Wrap(err, "failed to read output file for MIME detection") + } + kind, err := filetype.Match(buf) + if err != nil { + log.Printf("unable to detect file type: %s\n", err.Error()) + } + if err = os.Rename(fileName, fileName+"."+kind.Extension); err != nil { + log.Println("unknown file type") + return errors.Wrap(err, "unable to rename output file") + } + log.Println("found file type:", kind.MIME) + } } } } @@ -223,6 +248,10 @@ func getExt(mime string, file uint64) string { warnExt(file, "otf") return ".ttf" + case "": + log.Printf("file `%v` has no associated SQL entry; going to have to guess at its encoding", file) + return "" + default: log.Printf("encoding `%s` not recognised. create a PR or issue if you think it should be\n", mime) log.Printf("if you can provide details on the file `%v` as well, it would be appreciated", file)