diff options
26 files changed, 474 insertions, 129 deletions
@@ -1,33 +1,31 @@ GO ?= go +ifdef DTAIL_USE_ACL +GO_TAGS=linuxacl +endif +ifdef DTAIL_USE_PROPRIETARY +GO_TAGS+=proprietary +endif all: build build: dserver dcat dgrep dmap dtail dtailhealth dserver: -ifndef DTAIL_USE_ACL - ${GO} build ${GO_FLAGS} -o dserver ./cmd/dserver/main.go -else - ${GO} build ${GO_FLAGS} -tags linuxacl -o dserver ./cmd/dserver/main.go -endif + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dserver ./cmd/dserver/main.go dcat: - ${GO} build ${GO_FLAGS} -o dcat ./cmd/dcat/main.go + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dcat ./cmd/dcat/main.go dgrep: - ${GO} build ${GO_FLAGS} -o dgrep ./cmd/dgrep/main.go + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dgrep ./cmd/dgrep/main.go dmap: - ${GO} build ${GO_FLAGS} -o dmap ./cmd/dmap/main.go + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dmap ./cmd/dmap/main.go dtail: - ${GO} build ${GO_FLAGS} -o dtail ./cmd/dtail/main.go + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dtail ./cmd/dtail/main.go dtailhealth: - ${GO} build ${GO_FLAGS} -o dtailhealth ./cmd/dtailhealth/main.go + ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dtailhealth ./cmd/dtailhealth/main.go install: -ifndef DTAIL_USE_ACL - ${GO} install ./cmd/dserver/main.go -else - ${GO} install -tags linuxacl ./cmd/dserver/main.go -endif - ${GO} install ./cmd/dcat/main.go - ${GO} install ./cmd/dgrep/main.go - ${GO} install ./cmd/dmap/main.go - ${GO} install ./cmd/dtail/main.go - ${GO} install ./cmd/dtailhealth/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dserver/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dcat/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dgrep/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dmap/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dtail/main.go + ${GO} install -tags '${GO_TAGS}' ./cmd/dtailhealth/main.go clean: ls ./cmd/ | while read cmd; do \ test -f $$cmd && rm $$cmd; \ @@ -47,10 +45,5 @@ lint: done | grep -F .go: test: ${GO} clean -testcache -ifndef DTAIL_USE_ACL set -e; find . -name '*_test.go' | while read file; do dirname $$file; done | \ - sort -u | while read dir; do ${GO} test --race -v $$dir || exit 2; done -else - set -e;find . -name '*_test.go' | while read file; do dirname $$file; done | \ - sort -u | while read dir; do ${GO} test --tags linuxacl --race -v $$dir || exit 2; done -endif + sort -u | while read dir; do ${GO} test -tags '${GO_TAGS}' --race -v $$dir || exit 2; done diff --git a/doc/examples.md b/doc/examples.md index 26ce002..4937cc5 100644 --- a/doc/examples.md +++ b/doc/examples.md @@ -151,6 +151,25 @@ You can also use a file input pipe as follows: dmap 'from STATS select $hostname,max($goroutines),max($cgocalls),$loadavg,lifetimeConnections group by $hostname order by max($cgocalls)' ``` +### Aggregating CSV files + +In essence, this works exactly like aggregating logs. All files operated on must be valid CSV files and the first line of the CSV must be the header. E.g.: + +```shell +% cat example.csv +name,lastname,age,profession +Michael,Jordan,40,Basketball player +Michael,Jackson,100,Singer +Albert,Einstein,200,Physician +% dmap --query 'select lastname,name where age > 40 logformat csv outfile result.csv' example.csv +% cat result.csv +lastname,name +Jackson,Michael +Einstein,Albert +``` + +DMap can also be used to query and aggregate CSV files from remote servers. + ### Other serverless commands The serverless mode works transparently with all other DTail commands. Here are some examples: diff --git a/doc/logformats.md b/doc/logformats.md index c3f0c63..dbf2051 100644 --- a/doc/logformats.md +++ b/doc/logformats.md @@ -10,8 +10,10 @@ You could either make your application follow the DTail default log format, or y The following log formats are currently available out of the box: * `default` - The default DTail log format -* `generic` - A generic log format with a very simple set of fields +* `generic` - A generic log format with a simple set of fields * `generickv` - A simple log format expecting all log lines in form of `field1=value1|field2=value2|...` +* `csv` - A simple CSV format expecting all files a comma separated CSV file. The first line of the file must be the CSV header. +* `custom1` and `custom2` - Customizable log formats. ### Selecting a log format @@ -21,15 +23,26 @@ By default, DTail will use the `default` log format. You can override the log fo % dmap --files /var/log/example.log --query 'from EXAMPLE select ....queryhere.... logformat generickv' ``` -Alternatively, you can override the default log format with `MapreduceLogFormat` in the Server section of `dtail.json`. +You can override the default log format with `MapreduceLogFormat` in the Server section of `dtail.json`. ## Under the hood: generickv As an example, let's have a look at the `generickv` log format's implementation. It's located at `internal/mapr/logformat/generickv.go`: -```shell -// MakeFieldsGENERIGKV is the generic key-value logfile parser. -func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) { +```go +type genericKVParser struct { + defaultParser +} + +func newGenericKVParser(hostname, timeZoneName string, timeZoneOffset int) (*genericKVParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &genericKVParser{}, err + } + return &genericKVParser{defaultParser: *defaultParser}, nil +} + +func (p *genericKVParser) MakeFields(maprLine string) (map[string]string, error) { splitted := strings.Split(maprLine, protocol.FieldDelimiter) fields := make(map[string]string, len(splitted)) @@ -44,7 +57,7 @@ func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) for _, kv := range splitted[0:] { keyAndValue := strings.SplitN(kv, "=", 2) if len(keyAndValue) != 2 { - // dlog.Common.Debug("Unable to parse key-value token, ignoring it", kv) + //dlog.Common.Debug("Unable to parse key-value token, ignoring it", kv) continue } fields[keyAndValue[0]] = keyAndValue[1] @@ -101,26 +114,42 @@ These variables may only exist in the DTail default log format (see `internal/ma * `$pid` - DTail server process ID * `$uptime` - DTail server uptime -## Implementing your own log format +## Implementing your own log format `Foo` -All what needs to be done is to place your own implementation into the `logformat` source directory. As a template, you can copy an existing format ... +What needs to be done is to place your own implementation into the `logformat` source directory. As a template, you can copy an existing format ... ```shell -% cp internal/mapr/logformat/generic.go internal/mapr/logformat/yourcustomformat.go +% cp internal/mapr/logformat/generic.go internal/mapr/logformat/foo.go ``` -... and replace `GENERIGKV` with your format's name in capital letters (the method name string is used by DTail to reflect the log format parser method, so it is important to name it correctly): +... and replace `generic` ` with your format's name `foo`: + +```go +package logformat + +type fooParser struct { + defaultParser +} + +func newFooParser(hostname, timeZoneName string, timeZoneOffset int) (*fooParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &fooParser{}, err + } + return &fooParser{defaultParser: *defaultParser}, nil +} + +func (p *fooParser) MakeFields(maprLine string) (map[string]string, error) { + fields := make(map[string]string, 3) + + .. + <YOUR CUSTOM CODE HERE> + .. -```shell -// MakeFieldsCUSTOMLOGFORMAT is your own custom log format. -func (p *Parser) MakeFieldsCUSTOMLOGFORMAT(maprLine string) (map[string]string, error) { - // .. Your own format implementation goes here - // .. you can parse maprLine and store values into the fields map. -.. -. -. return fields, nil } ``` -Once done, recompile DTail. DTail now understands `... logformat customlogformat` (see "Seleting a log format" above). +Next, `NewParser(...)` in `internal/mapr/logformat/parser.go` needs to be extended, so that the new log format is part of the switch statement. If you don't want to edit `parser.go` then you could instead use `custom1` or `custom2` log formats, there are ready templates available in the `logformat` package. + +Once done, recompile DTail. DTail now understands `... logformat foo` (see "Seleting a log format" above). @@ -4,7 +4,10 @@ go 1.20 require ( github.com/DataDog/zstd v1.5.5 - golang.org/x/crypto v0.10.0 - golang.org/x/sys v0.9.0 // indirect - golang.org/x/term v0.9.0 // indirect + golang.org/x/crypto v0.13.0 +) + +require ( + golang.org/x/sys v0.12.0 // indirect + golang.org/x/term v0.12.0 // indirect ) @@ -1,8 +1,8 @@ -github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= -golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= -golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= -golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= -golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= -golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= +github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= diff --git a/integrationtests/dmap5.csv.expected b/integrationtests/dmap5.csv.expected new file mode 100644 index 0000000..1323a61 --- /dev/null +++ b/integrationtests/dmap5.csv.expected @@ -0,0 +1,2 @@ +sum($timecount),last($time),min($min_goroutines) +63.000000,1002-071143,12.000000 diff --git a/integrationtests/dmap5.csv.in b/integrationtests/dmap5.csv.in new file mode 100644 index 0000000..98c7763 --- /dev/null +++ b/integrationtests/dmap5.csv.in @@ -0,0 +1,4 @@ +count($time),$time,max($goroutines),avg($goroutines),min($goroutines) +23,1002-071147,16.000000,14.391304,12.000000 +20,1002-071213,17.000000,14.100000,12.000000 +20,1002-071143,17.000000,15.000000,13.000000 diff --git a/integrationtests/dmap5.csv.query.expected b/integrationtests/dmap5.csv.query.expected new file mode 100644 index 0000000..2d1723c --- /dev/null +++ b/integrationtests/dmap5.csv.query.expected @@ -0,0 +1 @@ +select sum($timecount),last($time),min($min_goroutines), group by $hostname set $timecount = `count($time)`, $time = `$time`, $min_goroutines = `min($goroutines)` logformat csv outfile dmap5.csv.tmp
\ No newline at end of file diff --git a/integrationtests/dmap_test.go b/integrationtests/dmap_test.go index a378fb5..f772243 100644 --- a/integrationtests/dmap_test.go +++ b/integrationtests/dmap_test.go @@ -245,3 +245,60 @@ func TestDMap4Append(t *testing.T) { os.Remove(csvFile) os.Remove(queryFile) } + +func TestDMap5CSV(t *testing.T) { + if !config.Env("DTAIL_INTEGRATION_TEST_RUN_MODE") { + t.Log("Skipping") + return + } + inFile := "dmap5.csv.in" + outFile := "dmap5.stdout.tmp" + csvFile := "dmap5.csv.tmp" + expectedCsvFile := "dmap5.csv.expected" + queryFile := fmt.Sprintf("%s.query", csvFile) + expectedQueryFile := "dmap5.csv.query.expected" + + // Delete in case it exists already. Otherwise, test will fail. + os.Remove(csvFile) + + query := fmt.Sprintf("select sum($timecount),last($time),min($min_goroutines),"+ + " group by $hostname"+ + " set $timecount = `count($time)`, $time = `$time`, $min_goroutines = `min($goroutines)`"+ + " logformat csv outfile %s", csvFile) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Run dmap command twice, it should append in the 2nd iteration the new results to the already existing + // file as we specified "outfile append". That works transparently for any mapreduce query + // (e.g. also for the dtail command in streaming mode). But it is easier to test with the dmap + // command. + for i := 0; i < 2; i++ { + stdoutCh, stderrCh, cmdErrCh, err := startCommand(ctx, t, + "", "../dmap", + "--query", query, + "--cfg", "none", + "--logger", "stdout", + "--logLevel", "info", + "--noColor", inFile) + + if err != nil { + t.Error(err) + return + } + waitForCommand(ctx, t, stdoutCh, stderrCh, cmdErrCh) + } + + if err := compareFilesContents(t, csvFile, expectedCsvFile); err != nil { + t.Error(err) + return + } + if err := compareFiles(t, queryFile, expectedQueryFile); err != nil { + t.Error(err) + return + } + + os.Remove(outFile) + os.Remove(csvFile) + os.Remove(queryFile) +} diff --git a/internal/mapr/logformat/csv.go b/internal/mapr/logformat/csv.go new file mode 100644 index 0000000..ea85ca9 --- /dev/null +++ b/internal/mapr/logformat/csv.go @@ -0,0 +1,53 @@ +package logformat + +import ( + "fmt" + "strings" + + "github.com/mimecast/dtail/internal/protocol" +) + +type csvParser struct { + defaultParser + header []string + hasHeader bool +} + +func newCSVParser(hostname, timeZoneName string, timeZoneOffset int) (*csvParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &csvParser{}, err + } + return &csvParser{defaultParser: *defaultParser}, nil +} + +func (p *csvParser) MakeFields(maprLine string) (map[string]string, error) { + if !p.hasHeader { + p.parseHeader(maprLine) + return nil, ErrIgnoreFields + } + + fields := make(map[string]string, 7+len(p.header)) + fields["*"] = "*" + fields["$hostname"] = p.hostname + fields["$server"] = p.hostname + fields["$line"] = maprLine + fields["$empty"] = "" + fields["$timezone"] = p.timeZoneName + fields["$timeoffset"] = p.timeZoneOffset + + splitted := strings.Split(maprLine, protocol.CSVDelimiter) + for i, value := range splitted { + if i >= len(p.header) { + return fields, fmt.Errorf("CSV file seems corrupted, more fields than header values?") + } + fields[p.header[i]] = value + } + + return fields, nil +} + +func (p *csvParser) parseHeader(maprLine string) { + p.header = strings.Split(maprLine, protocol.CSVDelimiter) + p.hasHeader = true +} diff --git a/internal/mapr/logformat/csv_test.go b/internal/mapr/logformat/csv_test.go new file mode 100644 index 0000000..1baf032 --- /dev/null +++ b/internal/mapr/logformat/csv_test.go @@ -0,0 +1,54 @@ +package logformat + +import ( + "strings" + "testing" + + "github.com/mimecast/dtail/internal/protocol" +) + +func TestCSVLogFormat(t *testing.T) { + parser, err := NewParser("csv", nil) + if err != nil { + t.Errorf("Unable to create parser: %s", err.Error()) + } + + headers := []string{"name", "last_name", "color", "profession", "employee_number"} + dataLine1 := []string{"Paul", "Buetow", "Orange", "Site Reliability Engineer", "4242"} + dataLine2 := []string{"Peter", "Bauer", "Black", "CEO", "1"} + + inputs := []string{ + strings.Join(headers, protocol.CSVDelimiter), + strings.Join(dataLine1, protocol.CSVDelimiter), + strings.Join(dataLine2, protocol.CSVDelimiter), + } + + // First line is the header! + if _, err := parser.MakeFields(inputs[0]); err != ErrIgnoreFields { + t.Errorf("Unable to parse the CSV header") + } + + // First data line + fields, err := parser.MakeFields(inputs[1]) + if err != nil { + t.Errorf("Unable to parse first CSV data line: %s", err.Error()) + } + if val := fields["name"]; val != "Paul" { + t.Errorf("Expected 'name' to be 'Paul' but got '%s'", val) + } + if val := fields["employee_number"]; val != "4242" { + t.Errorf("Expected 'employee_number' to be '4242' but got '%s'", val) + } + + // Second data line + fields, err = parser.MakeFields(inputs[2]) + if err != nil { + t.Errorf("Unable to parse first CSV data line: %s", err.Error()) + } + if val := fields["last_name"]; val != "Bauer" { + t.Errorf("Expected 'last_name' to be 'Bauer' but got '%s'", val) + } + if val := fields["color"]; val != "Black" { + t.Errorf("Expected 'color' to be 'Black' but got '%s'", val) + } +} diff --git a/internal/mapr/logformat/custom1.go b/internal/mapr/logformat/custom1.go new file mode 100644 index 0000000..7229f3e --- /dev/null +++ b/internal/mapr/logformat/custom1.go @@ -0,0 +1,16 @@ +package logformat + +import "errors" + +var ErrCustom1NotImplemented error = errors.New("custom1 log format is not implemented") + +// Template for creating a custom log format. +type custom1Parser struct{} + +func newCustom1Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom1Parser, error) { + return &custom1Parser{}, ErrCustom1NotImplemented +} + +func (p *custom1Parser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrCustom1NotImplemented +} diff --git a/internal/mapr/logformat/custom2.go b/internal/mapr/logformat/custom2.go new file mode 100644 index 0000000..262c721 --- /dev/null +++ b/internal/mapr/logformat/custom2.go @@ -0,0 +1,16 @@ +package logformat + +import "errors" + +var ErrCustom2NotImplemented error = errors.New("custom2 log format is not implemented") + +// Template for creating a custom log format. +type custom2Parser struct{} + +func newCustom2Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom2Parser, error) { + return &custom2Parser{}, ErrCustom2NotImplemented +} + +func (p *custom2Parser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrCustom2NotImplemented +} diff --git a/internal/mapr/logformat/default.go b/internal/mapr/logformat/default.go index a44b49a..a499bc5 100644 --- a/internal/mapr/logformat/default.go +++ b/internal/mapr/logformat/default.go @@ -7,8 +7,21 @@ import ( "github.com/mimecast/dtail/internal/protocol" ) -// MakeFieldsDEFAULT is the default DTail log file key-value parser. -func (p *Parser) MakeFieldsDEFAULT(maprLine string) (map[string]string, error) { +type defaultParser struct { + hostname string + timeZoneName string + timeZoneOffset string +} + +func newDefaultParser(hostname, timeZoneName string, timeZoneOffset int) (*defaultParser, error) { + return &defaultParser{ + hostname: hostname, + timeZoneName: timeZoneName, + timeZoneOffset: fmt.Sprintf("%d", timeZoneOffset), + }, nil +} + +func (p *defaultParser) MakeFields(maprLine string) (map[string]string, error) { splitted := strings.Split(maprLine, protocol.FieldDelimiter) if len(splitted) < 11 || !strings.HasPrefix(splitted[9], "MAPREDUCE:") || diff --git a/internal/mapr/logformat/default_test.go b/internal/mapr/logformat/default_test.go index 28e1acc..4eae81b 100644 --- a/internal/mapr/logformat/default_test.go +++ b/internal/mapr/logformat/default_test.go @@ -87,6 +87,10 @@ func TestDefaultLogFormat(t *testing.T) { } fields, err := parser.MakeFields("foozoo=bar|bazbay") + if err != nil && err != ErrIgnoreFields { + t.Errorf(err.Error()) + } + if _, ok := fields["foo"]; ok { t.Errorf("Expected fiending field 'foo', but found it\n") } diff --git a/internal/mapr/logformat/generic.go b/internal/mapr/logformat/generic.go index 14ac2a9..32d9b4a 100644 --- a/internal/mapr/logformat/generic.go +++ b/internal/mapr/logformat/generic.go @@ -1,7 +1,18 @@ package logformat -// MakeFieldsGENERIC is the generic log line parser. -func (p *Parser) MakeFieldsGENERIC(maprLine string) (map[string]string, error) { +type genericParser struct { + defaultParser +} + +func newGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*genericParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &genericParser{}, err + } + return &genericParser{defaultParser: *defaultParser}, nil +} + +func (p *genericParser) MakeFields(maprLine string) (map[string]string, error) { fields := make(map[string]string, 3) fields["*"] = "*" diff --git a/internal/mapr/logformat/generickv.go b/internal/mapr/logformat/generickv.go index 3452e97..9c3de92 100644 --- a/internal/mapr/logformat/generickv.go +++ b/internal/mapr/logformat/generickv.go @@ -6,8 +6,19 @@ import ( "github.com/mimecast/dtail/internal/protocol" ) -// MakeFieldsGENERIGKV is the generic key-value logfile parser. -func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) { +type genericKVParser struct { + defaultParser +} + +func newGenericKVParser(hostname, timeZoneName string, timeZoneOffset int) (*genericKVParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &genericKVParser{}, err + } + return &genericKVParser{defaultParser: *defaultParser}, nil +} + +func (p *genericKVParser) MakeFields(maprLine string) (map[string]string, error) { splitted := strings.Split(maprLine, protocol.FieldDelimiter) fields := make(map[string]string, len(splitted)) diff --git a/internal/mapr/logformat/mimecast.go b/internal/mapr/logformat/mimecast.go new file mode 100644 index 0000000..cf6b333 --- /dev/null +++ b/internal/mapr/logformat/mimecast.go @@ -0,0 +1,23 @@ +//go:build !proprietary +// +build !proprietary + +package logformat + +import "errors" + +// ErrMimecastNotAvailable is thrown in the open source version of DTail +var ErrMimecastNotAvailable error = errors.New("The mimecast logformat is not available in this build of DTail") + +type mimecastParser struct{} + +func newMimecastParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) { + return &mimecastParser{}, ErrMimecastNotAvailable +} + +func newMimecastGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) { + return &mimecastParser{}, ErrMimecastNotAvailable +} + +func (p *mimecastParser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrMimecastNotAvailable +} diff --git a/internal/mapr/logformat/parser.go b/internal/mapr/logformat/parser.go index d6aac8c..37d7a63 100644 --- a/internal/mapr/logformat/parser.go +++ b/internal/mapr/logformat/parser.go @@ -3,8 +3,6 @@ package logformat import ( "errors" "fmt" - "reflect" - "strings" "time" "github.com/mimecast/dtail/internal/config" @@ -15,62 +13,44 @@ import ( var ErrIgnoreFields error = errors.New("Ignore this field set") // Parser is used to parse the mapreduce information from the server log files. -type Parser struct { - hostname string - logFormatName string - makeFieldsFunc reflect.Value - makeFieldsReceiver reflect.Value - timeZoneName string - timeZoneOffset string +type Parser interface { + // MakeFields creates a field map from an input log line. + MakeFields(string) (map[string]string, error) } // NewParser returns a new log parser. -func NewParser(logFormatName string, query *mapr.Query) (*Parser, error) { +func NewParser(logFormatName string, query *mapr.Query) (Parser, error) { hostname, err := config.Hostname() if err != nil { return nil, err } now := time.Now() - zone, offset := now.Zone() - - p := Parser{ - hostname: hostname, - timeZoneName: zone, - timeZoneOffset: fmt.Sprintf("%d", offset), - } - - err = p.reflectLogFormat(logFormatName) - if err != nil { - return nil, err - } - return &p, nil -} - -// The aim of this is that everyone can plug in their own mapr log format -// parsing method to DTail. Just add a method MakeFieldsMODULENAME to type -// Parser. Whereas MODULENAME must be a upeprcase string. -func (p *Parser) reflectLogFormat(logFormatName string) error { - methodName := fmt.Sprintf("MakeFields%s", strings.ToUpper(logFormatName)) - rt := reflect.TypeOf(p) - method, ok := rt.MethodByName(methodName) - if !ok { - return errors.New("No such mapr log format module: " + methodName) - } - - p.makeFieldsFunc = method.Func - p.makeFieldsReceiver = reflect.ValueOf(p) - return nil -} - -// MakeFields is for returning the fields from a given log line. -func (p *Parser) MakeFields(maprLine string) (fields map[string]string, err error) { - inputValues := []reflect.Value{p.makeFieldsReceiver, reflect.ValueOf(maprLine)} - returnValues := p.makeFieldsFunc.Call(inputValues) - errInterface := returnValues[1].Interface() - if errInterface == nil { - fields, err = returnValues[0].Interface().(map[string]string), nil - return + timeZoneName, timeZoneOffset := now.Zone() + + // Extend this for adding more log formats! + switch logFormatName { + case "generic": + return newGenericParser(hostname, timeZoneName, timeZoneOffset) + case "generickv": + return newGenericKVParser(hostname, timeZoneName, timeZoneOffset) + case "csv": + return newCSVParser(hostname, timeZoneName, timeZoneOffset) + case "mimecast": + return newMimecastParser(hostname, timeZoneName, timeZoneOffset) + case "mimecastgeneric": + return newMimecastGenericParser(hostname, timeZoneName, timeZoneOffset) + case "default": + return newDefaultParser(hostname, timeZoneName, timeZoneOffset) + case "custom1": + return newCustom1Parser(hostname, timeZoneName, timeZoneOffset) + case "custom2": + return newCustom2Parser(hostname, timeZoneName, timeZoneOffset) + default: + p, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return p, fmt.Errorf("No '%s' mapr log format and problem creating default one: %v", + logFormatName, err) + } + return p, fmt.Errorf("No '%s' mapr log format", logFormatName) } - fields, err = returnValues[0].Interface().(map[string]string), errInterface.(error) - return } diff --git a/internal/mapr/query.go b/internal/mapr/query.go index 247cdaf..ddcbc90 100644 --- a/internal/mapr/query.go +++ b/internal/mapr/query.go @@ -73,6 +73,13 @@ func NewQuery(queryStr string) (*Query, error) { Interval: time.Second * 5, Limit: -1, } + + // If log format is CSV, then use "." as the table. It means, that + // we don't do any file filtering, we process all lines of the CSV. + if q.LogFormat == "csv" { + q.Table = "." + } + return &q, q.parse(tokens) } @@ -87,8 +94,7 @@ func (q *Query) Has(what string) bool { } func (q *Query) parse(tokens []token) error { - tokens, err := q.parseTokens(tokens) - if err != nil { + if _, err := q.parseTokens(tokens); err != nil { return err } diff --git a/internal/mapr/query_test.go b/internal/mapr/query_test.go index f03ccba..f37b8d4 100644 --- a/internal/mapr/query_test.go +++ b/internal/mapr/query_test.go @@ -252,3 +252,43 @@ func TestParseQueryDeep(t *testing.T) { } } } + +func TestQuotedSelectCondition(t *testing.T) { + queryStr := "select `count($foo)`, foo, $foo, count($foo) logformat csv" + + q, err := NewQuery(queryStr) + if err != nil { + t.Errorf("Query parse error: %s\n%v: %v", queryStr, q, err) + } + if len(q.Select) != 4 { + t.Errorf("Expected three elements in 'select' clause but got '%v': %s\n%v", + q.Select, queryStr, q) + } + + if q.Select[0].Field != "count($foo)" { + t.Errorf("Expected 'num($foo)' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[0].Field, queryStr, q) + } + if q.Select[0].Operation != Last { + t.Errorf("Expected 'Last' as aggregation function of first element in "+ + "'select' clause but got '%v': %s\n%v", q.Select[0].Operation, queryStr, q) + } + + if q.Select[1].Field != "foo" { + t.Errorf("Expected 'foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[1].Field, queryStr, q) + } + if q.Select[2].Field != "$foo" { + t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[2].Field, queryStr, q) + } + + if q.Select[3].Field != "$foo" { + t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[3].Field, queryStr, q) + } + if q.Select[3].Operation != Count { + t.Errorf("Expected 'count' as aggregation function of thourth element in "+ + "'select' clause but got '%v': %s\n%v", q.Select[3].Operation, queryStr, q) + } +} diff --git a/internal/mapr/selectcondition.go b/internal/mapr/selectcondition.go index 45fc16b..78359c7 100644 --- a/internal/mapr/selectcondition.go +++ b/internal/mapr/selectcondition.go @@ -40,16 +40,18 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { // Parse select aggregation, e.g. sum(foo) parse := func(token token) (selectCondition, error) { var sc selectCondition - tokenStr := token.str - if !strings.Contains(tokenStr, "(") && !strings.Contains(tokenStr, ")") { - sc.Field = tokenStr - sc.FieldStorage = tokenStr + // With quotes stripped: We got a quoted select expression, e.g. + // "select `count($foo)` ...", which will literaly look for field + // "count($foo)" without performing the count aggregation. + if token.quotesStripped || (!strings.Contains(token.str, "(") && !strings.Contains(token.str, ")")) { + sc.Field = token.str + sc.FieldStorage = token.str sc.Operation = Last return sc, nil } - a := strings.Split(tokenStr, "(") + a := strings.Split(token.str, "(") if len(a) != 2 { return sc, errors.New(invalidQuery + "Can't parse 'select' aggregation: " + token.str) @@ -61,8 +63,8 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { return sc, errors.New(invalidQuery + "Can't parse 'select' field name " + "from aggregation: " + token.str) } - sc.Field = b[0] // Field name, e.g. 'foo' - sc.FieldStorage = tokenStr // e.g. 'sum(foo)' + sc.Field = b[0] // Field name, e.g. 'foo' + sc.FieldStorage = token.str // e.g. 'sum(foo)' switch agg { case "count": @@ -80,8 +82,7 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { case "len": sc.Operation = Len default: - return sc, errors.New(invalidQuery + - "Unknown aggregation in 'select' clause: " + agg) + return sc, errors.New(invalidQuery + "Unknown aggregation in 'select' clause: " + agg) } return sc, nil } diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index ed32f8f..4f14751 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -27,7 +27,7 @@ type Aggregate struct { // The mapr query query *mapr.Query // The mapr log format parser - parser *logformat.Parser + parser logformat.Parser } // NewAggregate return a new server side aggregator. diff --git a/internal/mapr/setcondition.go b/internal/mapr/setcondition.go index 9dcd690..308a0f4 100644 --- a/internal/mapr/setcondition.go +++ b/internal/mapr/setcondition.go @@ -37,6 +37,13 @@ func makeSetConditions(tokens []token) (set []setCondition, err error) { return sc, nil, err } + // Seems like a quoted string? E.g.: "set $foo = `count(bar)`" + // So don't interpret `count` as a function! + if tokens[2].quotesStripped { + sc.rType = Field + return sc, tokens[3:], nil + } + // Seems like a function call? if strings.HasSuffix(sc.rString, ")") { functionStack, functionArg, err := funcs.NewFunctionStack(tokens[2].str) diff --git a/internal/mapr/token.go b/internal/mapr/token.go index 6ac7631..48d1192 100644 --- a/internal/mapr/token.go +++ b/internal/mapr/token.go @@ -9,8 +9,9 @@ var keywords = [...]string{"select", "from", "where", "set", "group", "rorder", // Represents a parsed token, used to parse the mapr query. type token struct { - str string - isBareword bool + str string + isBareword bool + quotesStripped bool } func (t token) isKeyword() bool { @@ -71,8 +72,9 @@ func tokensConsume(tokens []token) ([]token, []token) { stripped := t.str[1 : length-1] //dlog.Common.Trace("stripped", stripped) t := token{ - str: stripped, - isBareword: t.isBareword, + str: stripped, + isBareword: t.isBareword, + quotesStripped: true, } consumed = append(consumed, t) continue diff --git a/internal/version/version.go b/internal/version/version.go index 90073b5..15ea50f 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -13,7 +13,7 @@ const ( // Name of DTail. Name string = "DTail" // Version of DTail. - Version string = "4.2.0" + Version string = "4.3.0" // Additional information for DTail Additional string = "Have a lot of fun!" ) |
