summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile45
-rw-r--r--doc/examples.md19
-rw-r--r--doc/logformats.md67
-rw-r--r--go.mod9
-rw-r--r--go.sum14
-rw-r--r--integrationtests/dmap5.csv.expected2
-rw-r--r--integrationtests/dmap5.csv.in4
-rw-r--r--integrationtests/dmap5.csv.query.expected1
-rw-r--r--integrationtests/dmap_test.go57
-rw-r--r--internal/mapr/logformat/csv.go53
-rw-r--r--internal/mapr/logformat/csv_test.go54
-rw-r--r--internal/mapr/logformat/custom1.go16
-rw-r--r--internal/mapr/logformat/custom2.go16
-rw-r--r--internal/mapr/logformat/default.go17
-rw-r--r--internal/mapr/logformat/default_test.go4
-rw-r--r--internal/mapr/logformat/generic.go15
-rw-r--r--internal/mapr/logformat/generickv.go15
-rw-r--r--internal/mapr/logformat/mimecast.go23
-rw-r--r--internal/mapr/logformat/parser.go82
-rw-r--r--internal/mapr/query.go10
-rw-r--r--internal/mapr/query_test.go40
-rw-r--r--internal/mapr/selectcondition.go19
-rw-r--r--internal/mapr/server/aggregate.go2
-rw-r--r--internal/mapr/setcondition.go7
-rw-r--r--internal/mapr/token.go10
-rw-r--r--internal/version/version.go2
26 files changed, 474 insertions, 129 deletions
diff --git a/Makefile b/Makefile
index d764051..717d701 100644
--- a/Makefile
+++ b/Makefile
@@ -1,33 +1,31 @@
GO ?= go
+ifdef DTAIL_USE_ACL
+GO_TAGS=linuxacl
+endif
+ifdef DTAIL_USE_PROPRIETARY
+GO_TAGS+=proprietary
+endif
all: build
build: dserver dcat dgrep dmap dtail dtailhealth
dserver:
-ifndef DTAIL_USE_ACL
- ${GO} build ${GO_FLAGS} -o dserver ./cmd/dserver/main.go
-else
- ${GO} build ${GO_FLAGS} -tags linuxacl -o dserver ./cmd/dserver/main.go
-endif
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dserver ./cmd/dserver/main.go
dcat:
- ${GO} build ${GO_FLAGS} -o dcat ./cmd/dcat/main.go
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dcat ./cmd/dcat/main.go
dgrep:
- ${GO} build ${GO_FLAGS} -o dgrep ./cmd/dgrep/main.go
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dgrep ./cmd/dgrep/main.go
dmap:
- ${GO} build ${GO_FLAGS} -o dmap ./cmd/dmap/main.go
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dmap ./cmd/dmap/main.go
dtail:
- ${GO} build ${GO_FLAGS} -o dtail ./cmd/dtail/main.go
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dtail ./cmd/dtail/main.go
dtailhealth:
- ${GO} build ${GO_FLAGS} -o dtailhealth ./cmd/dtailhealth/main.go
+ ${GO} build ${GO_FLAGS} -tags '${GO_TAGS}' -o dtailhealth ./cmd/dtailhealth/main.go
install:
-ifndef DTAIL_USE_ACL
- ${GO} install ./cmd/dserver/main.go
-else
- ${GO} install -tags linuxacl ./cmd/dserver/main.go
-endif
- ${GO} install ./cmd/dcat/main.go
- ${GO} install ./cmd/dgrep/main.go
- ${GO} install ./cmd/dmap/main.go
- ${GO} install ./cmd/dtail/main.go
- ${GO} install ./cmd/dtailhealth/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dserver/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dcat/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dgrep/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dmap/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dtail/main.go
+ ${GO} install -tags '${GO_TAGS}' ./cmd/dtailhealth/main.go
clean:
ls ./cmd/ | while read cmd; do \
test -f $$cmd && rm $$cmd; \
@@ -47,10 +45,5 @@ lint:
done | grep -F .go:
test:
${GO} clean -testcache
-ifndef DTAIL_USE_ACL
set -e; find . -name '*_test.go' | while read file; do dirname $$file; done | \
- sort -u | while read dir; do ${GO} test --race -v $$dir || exit 2; done
-else
- set -e;find . -name '*_test.go' | while read file; do dirname $$file; done | \
- sort -u | while read dir; do ${GO} test --tags linuxacl --race -v $$dir || exit 2; done
-endif
+ sort -u | while read dir; do ${GO} test -tags '${GO_TAGS}' --race -v $$dir || exit 2; done
diff --git a/doc/examples.md b/doc/examples.md
index 26ce002..4937cc5 100644
--- a/doc/examples.md
+++ b/doc/examples.md
@@ -151,6 +151,25 @@ You can also use a file input pipe as follows:
dmap 'from STATS select $hostname,max($goroutines),max($cgocalls),$loadavg,lifetimeConnections group by $hostname order by max($cgocalls)'
```
+### Aggregating CSV files
+
+In essence, this works exactly like aggregating logs. All files operated on must be valid CSV files and the first line of the CSV must be the header. E.g.:
+
+```shell
+% cat example.csv
+name,lastname,age,profession
+Michael,Jordan,40,Basketball player
+Michael,Jackson,100,Singer
+Albert,Einstein,200,Physician
+% dmap --query 'select lastname,name where age > 40 logformat csv outfile result.csv' example.csv
+% cat result.csv
+lastname,name
+Jackson,Michael
+Einstein,Albert
+```
+
+DMap can also be used to query and aggregate CSV files from remote servers.
+
### Other serverless commands
The serverless mode works transparently with all other DTail commands. Here are some examples:
diff --git a/doc/logformats.md b/doc/logformats.md
index c3f0c63..dbf2051 100644
--- a/doc/logformats.md
+++ b/doc/logformats.md
@@ -10,8 +10,10 @@ You could either make your application follow the DTail default log format, or y
The following log formats are currently available out of the box:
* `default` - The default DTail log format
-* `generic` - A generic log format with a very simple set of fields
+* `generic` - A generic log format with a simple set of fields
* `generickv` - A simple log format expecting all log lines in form of `field1=value1|field2=value2|...`
+* `csv` - A simple CSV format expecting all files a comma separated CSV file. The first line of the file must be the CSV header.
+* `custom1` and `custom2` - Customizable log formats.
### Selecting a log format
@@ -21,15 +23,26 @@ By default, DTail will use the `default` log format. You can override the log fo
% dmap --files /var/log/example.log --query 'from EXAMPLE select ....queryhere.... logformat generickv'
```
-Alternatively, you can override the default log format with `MapreduceLogFormat` in the Server section of `dtail.json`.
+You can override the default log format with `MapreduceLogFormat` in the Server section of `dtail.json`.
## Under the hood: generickv
As an example, let's have a look at the `generickv` log format's implementation. It's located at `internal/mapr/logformat/generickv.go`:
-```shell
-// MakeFieldsGENERIGKV is the generic key-value logfile parser.
-func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) {
+```go
+type genericKVParser struct {
+ defaultParser
+}
+
+func newGenericKVParser(hostname, timeZoneName string, timeZoneOffset int) (*genericKVParser, error) {
+ defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return &genericKVParser{}, err
+ }
+ return &genericKVParser{defaultParser: *defaultParser}, nil
+}
+
+func (p *genericKVParser) MakeFields(maprLine string) (map[string]string, error) {
splitted := strings.Split(maprLine, protocol.FieldDelimiter)
fields := make(map[string]string, len(splitted))
@@ -44,7 +57,7 @@ func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error)
for _, kv := range splitted[0:] {
keyAndValue := strings.SplitN(kv, "=", 2)
if len(keyAndValue) != 2 {
- // dlog.Common.Debug("Unable to parse key-value token, ignoring it", kv)
+ //dlog.Common.Debug("Unable to parse key-value token, ignoring it", kv)
continue
}
fields[keyAndValue[0]] = keyAndValue[1]
@@ -101,26 +114,42 @@ These variables may only exist in the DTail default log format (see `internal/ma
* `$pid` - DTail server process ID
* `$uptime` - DTail server uptime
-## Implementing your own log format
+## Implementing your own log format `Foo`
-All what needs to be done is to place your own implementation into the `logformat` source directory. As a template, you can copy an existing format ...
+What needs to be done is to place your own implementation into the `logformat` source directory. As a template, you can copy an existing format ...
```shell
-% cp internal/mapr/logformat/generic.go internal/mapr/logformat/yourcustomformat.go
+% cp internal/mapr/logformat/generic.go internal/mapr/logformat/foo.go
```
-... and replace `GENERIGKV` with your format's name in capital letters (the method name string is used by DTail to reflect the log format parser method, so it is important to name it correctly):
+... and replace `generic` ` with your format's name `foo`:
+
+```go
+package logformat
+
+type fooParser struct {
+ defaultParser
+}
+
+func newFooParser(hostname, timeZoneName string, timeZoneOffset int) (*fooParser, error) {
+ defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return &fooParser{}, err
+ }
+ return &fooParser{defaultParser: *defaultParser}, nil
+}
+
+func (p *fooParser) MakeFields(maprLine string) (map[string]string, error) {
+ fields := make(map[string]string, 3)
+
+ ..
+ <YOUR CUSTOM CODE HERE>
+ ..
-```shell
-// MakeFieldsCUSTOMLOGFORMAT is your own custom log format.
-func (p *Parser) MakeFieldsCUSTOMLOGFORMAT(maprLine string) (map[string]string, error) {
- // .. Your own format implementation goes here
- // .. you can parse maprLine and store values into the fields map.
-..
-.
-.
return fields, nil
}
```
-Once done, recompile DTail. DTail now understands `... logformat customlogformat` (see "Seleting a log format" above).
+Next, `NewParser(...)` in `internal/mapr/logformat/parser.go` needs to be extended, so that the new log format is part of the switch statement. If you don't want to edit `parser.go` then you could instead use `custom1` or `custom2` log formats, there are ready templates available in the `logformat` package.
+
+Once done, recompile DTail. DTail now understands `... logformat foo` (see "Seleting a log format" above).
diff --git a/go.mod b/go.mod
index fce2a35..edef243 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,10 @@ go 1.20
require (
github.com/DataDog/zstd v1.5.5
- golang.org/x/crypto v0.10.0
- golang.org/x/sys v0.9.0 // indirect
- golang.org/x/term v0.9.0 // indirect
+ golang.org/x/crypto v0.13.0
+)
+
+require (
+ golang.org/x/sys v0.12.0 // indirect
+ golang.org/x/term v0.12.0 // indirect
)
diff --git a/go.sum b/go.sum
index 90a6d4b..f0d2329 100644
--- a/go.sum
+++ b/go.sum
@@ -1,8 +1,8 @@
-github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ=
-golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
-golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
-golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
-golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo=
-golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28=
+github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
+golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
diff --git a/integrationtests/dmap5.csv.expected b/integrationtests/dmap5.csv.expected
new file mode 100644
index 0000000..1323a61
--- /dev/null
+++ b/integrationtests/dmap5.csv.expected
@@ -0,0 +1,2 @@
+sum($timecount),last($time),min($min_goroutines)
+63.000000,1002-071143,12.000000
diff --git a/integrationtests/dmap5.csv.in b/integrationtests/dmap5.csv.in
new file mode 100644
index 0000000..98c7763
--- /dev/null
+++ b/integrationtests/dmap5.csv.in
@@ -0,0 +1,4 @@
+count($time),$time,max($goroutines),avg($goroutines),min($goroutines)
+23,1002-071147,16.000000,14.391304,12.000000
+20,1002-071213,17.000000,14.100000,12.000000
+20,1002-071143,17.000000,15.000000,13.000000
diff --git a/integrationtests/dmap5.csv.query.expected b/integrationtests/dmap5.csv.query.expected
new file mode 100644
index 0000000..2d1723c
--- /dev/null
+++ b/integrationtests/dmap5.csv.query.expected
@@ -0,0 +1 @@
+select sum($timecount),last($time),min($min_goroutines), group by $hostname set $timecount = `count($time)`, $time = `$time`, $min_goroutines = `min($goroutines)` logformat csv outfile dmap5.csv.tmp \ No newline at end of file
diff --git a/integrationtests/dmap_test.go b/integrationtests/dmap_test.go
index a378fb5..f772243 100644
--- a/integrationtests/dmap_test.go
+++ b/integrationtests/dmap_test.go
@@ -245,3 +245,60 @@ func TestDMap4Append(t *testing.T) {
os.Remove(csvFile)
os.Remove(queryFile)
}
+
+func TestDMap5CSV(t *testing.T) {
+ if !config.Env("DTAIL_INTEGRATION_TEST_RUN_MODE") {
+ t.Log("Skipping")
+ return
+ }
+ inFile := "dmap5.csv.in"
+ outFile := "dmap5.stdout.tmp"
+ csvFile := "dmap5.csv.tmp"
+ expectedCsvFile := "dmap5.csv.expected"
+ queryFile := fmt.Sprintf("%s.query", csvFile)
+ expectedQueryFile := "dmap5.csv.query.expected"
+
+ // Delete in case it exists already. Otherwise, test will fail.
+ os.Remove(csvFile)
+
+ query := fmt.Sprintf("select sum($timecount),last($time),min($min_goroutines),"+
+ " group by $hostname"+
+ " set $timecount = `count($time)`, $time = `$time`, $min_goroutines = `min($goroutines)`"+
+ " logformat csv outfile %s", csvFile)
+
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ // Run dmap command twice, it should append in the 2nd iteration the new results to the already existing
+ // file as we specified "outfile append". That works transparently for any mapreduce query
+ // (e.g. also for the dtail command in streaming mode). But it is easier to test with the dmap
+ // command.
+ for i := 0; i < 2; i++ {
+ stdoutCh, stderrCh, cmdErrCh, err := startCommand(ctx, t,
+ "", "../dmap",
+ "--query", query,
+ "--cfg", "none",
+ "--logger", "stdout",
+ "--logLevel", "info",
+ "--noColor", inFile)
+
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ waitForCommand(ctx, t, stdoutCh, stderrCh, cmdErrCh)
+ }
+
+ if err := compareFilesContents(t, csvFile, expectedCsvFile); err != nil {
+ t.Error(err)
+ return
+ }
+ if err := compareFiles(t, queryFile, expectedQueryFile); err != nil {
+ t.Error(err)
+ return
+ }
+
+ os.Remove(outFile)
+ os.Remove(csvFile)
+ os.Remove(queryFile)
+}
diff --git a/internal/mapr/logformat/csv.go b/internal/mapr/logformat/csv.go
new file mode 100644
index 0000000..ea85ca9
--- /dev/null
+++ b/internal/mapr/logformat/csv.go
@@ -0,0 +1,53 @@
+package logformat
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/mimecast/dtail/internal/protocol"
+)
+
+type csvParser struct {
+ defaultParser
+ header []string
+ hasHeader bool
+}
+
+func newCSVParser(hostname, timeZoneName string, timeZoneOffset int) (*csvParser, error) {
+ defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return &csvParser{}, err
+ }
+ return &csvParser{defaultParser: *defaultParser}, nil
+}
+
+func (p *csvParser) MakeFields(maprLine string) (map[string]string, error) {
+ if !p.hasHeader {
+ p.parseHeader(maprLine)
+ return nil, ErrIgnoreFields
+ }
+
+ fields := make(map[string]string, 7+len(p.header))
+ fields["*"] = "*"
+ fields["$hostname"] = p.hostname
+ fields["$server"] = p.hostname
+ fields["$line"] = maprLine
+ fields["$empty"] = ""
+ fields["$timezone"] = p.timeZoneName
+ fields["$timeoffset"] = p.timeZoneOffset
+
+ splitted := strings.Split(maprLine, protocol.CSVDelimiter)
+ for i, value := range splitted {
+ if i >= len(p.header) {
+ return fields, fmt.Errorf("CSV file seems corrupted, more fields than header values?")
+ }
+ fields[p.header[i]] = value
+ }
+
+ return fields, nil
+}
+
+func (p *csvParser) parseHeader(maprLine string) {
+ p.header = strings.Split(maprLine, protocol.CSVDelimiter)
+ p.hasHeader = true
+}
diff --git a/internal/mapr/logformat/csv_test.go b/internal/mapr/logformat/csv_test.go
new file mode 100644
index 0000000..1baf032
--- /dev/null
+++ b/internal/mapr/logformat/csv_test.go
@@ -0,0 +1,54 @@
+package logformat
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/mimecast/dtail/internal/protocol"
+)
+
+func TestCSVLogFormat(t *testing.T) {
+ parser, err := NewParser("csv", nil)
+ if err != nil {
+ t.Errorf("Unable to create parser: %s", err.Error())
+ }
+
+ headers := []string{"name", "last_name", "color", "profession", "employee_number"}
+ dataLine1 := []string{"Paul", "Buetow", "Orange", "Site Reliability Engineer", "4242"}
+ dataLine2 := []string{"Peter", "Bauer", "Black", "CEO", "1"}
+
+ inputs := []string{
+ strings.Join(headers, protocol.CSVDelimiter),
+ strings.Join(dataLine1, protocol.CSVDelimiter),
+ strings.Join(dataLine2, protocol.CSVDelimiter),
+ }
+
+ // First line is the header!
+ if _, err := parser.MakeFields(inputs[0]); err != ErrIgnoreFields {
+ t.Errorf("Unable to parse the CSV header")
+ }
+
+ // First data line
+ fields, err := parser.MakeFields(inputs[1])
+ if err != nil {
+ t.Errorf("Unable to parse first CSV data line: %s", err.Error())
+ }
+ if val := fields["name"]; val != "Paul" {
+ t.Errorf("Expected 'name' to be 'Paul' but got '%s'", val)
+ }
+ if val := fields["employee_number"]; val != "4242" {
+ t.Errorf("Expected 'employee_number' to be '4242' but got '%s'", val)
+ }
+
+ // Second data line
+ fields, err = parser.MakeFields(inputs[2])
+ if err != nil {
+ t.Errorf("Unable to parse first CSV data line: %s", err.Error())
+ }
+ if val := fields["last_name"]; val != "Bauer" {
+ t.Errorf("Expected 'last_name' to be 'Bauer' but got '%s'", val)
+ }
+ if val := fields["color"]; val != "Black" {
+ t.Errorf("Expected 'color' to be 'Black' but got '%s'", val)
+ }
+}
diff --git a/internal/mapr/logformat/custom1.go b/internal/mapr/logformat/custom1.go
new file mode 100644
index 0000000..7229f3e
--- /dev/null
+++ b/internal/mapr/logformat/custom1.go
@@ -0,0 +1,16 @@
+package logformat
+
+import "errors"
+
+var ErrCustom1NotImplemented error = errors.New("custom1 log format is not implemented")
+
+// Template for creating a custom log format.
+type custom1Parser struct{}
+
+func newCustom1Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom1Parser, error) {
+ return &custom1Parser{}, ErrCustom1NotImplemented
+}
+
+func (p *custom1Parser) MakeFields(maprLine string) (map[string]string, error) {
+ return nil, ErrCustom1NotImplemented
+}
diff --git a/internal/mapr/logformat/custom2.go b/internal/mapr/logformat/custom2.go
new file mode 100644
index 0000000..262c721
--- /dev/null
+++ b/internal/mapr/logformat/custom2.go
@@ -0,0 +1,16 @@
+package logformat
+
+import "errors"
+
+var ErrCustom2NotImplemented error = errors.New("custom2 log format is not implemented")
+
+// Template for creating a custom log format.
+type custom2Parser struct{}
+
+func newCustom2Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom2Parser, error) {
+ return &custom2Parser{}, ErrCustom2NotImplemented
+}
+
+func (p *custom2Parser) MakeFields(maprLine string) (map[string]string, error) {
+ return nil, ErrCustom2NotImplemented
+}
diff --git a/internal/mapr/logformat/default.go b/internal/mapr/logformat/default.go
index a44b49a..a499bc5 100644
--- a/internal/mapr/logformat/default.go
+++ b/internal/mapr/logformat/default.go
@@ -7,8 +7,21 @@ import (
"github.com/mimecast/dtail/internal/protocol"
)
-// MakeFieldsDEFAULT is the default DTail log file key-value parser.
-func (p *Parser) MakeFieldsDEFAULT(maprLine string) (map[string]string, error) {
+type defaultParser struct {
+ hostname string
+ timeZoneName string
+ timeZoneOffset string
+}
+
+func newDefaultParser(hostname, timeZoneName string, timeZoneOffset int) (*defaultParser, error) {
+ return &defaultParser{
+ hostname: hostname,
+ timeZoneName: timeZoneName,
+ timeZoneOffset: fmt.Sprintf("%d", timeZoneOffset),
+ }, nil
+}
+
+func (p *defaultParser) MakeFields(maprLine string) (map[string]string, error) {
splitted := strings.Split(maprLine, protocol.FieldDelimiter)
if len(splitted) < 11 || !strings.HasPrefix(splitted[9], "MAPREDUCE:") ||
diff --git a/internal/mapr/logformat/default_test.go b/internal/mapr/logformat/default_test.go
index 28e1acc..4eae81b 100644
--- a/internal/mapr/logformat/default_test.go
+++ b/internal/mapr/logformat/default_test.go
@@ -87,6 +87,10 @@ func TestDefaultLogFormat(t *testing.T) {
}
fields, err := parser.MakeFields("foozoo=bar|bazbay")
+ if err != nil && err != ErrIgnoreFields {
+ t.Errorf(err.Error())
+ }
+
if _, ok := fields["foo"]; ok {
t.Errorf("Expected fiending field 'foo', but found it\n")
}
diff --git a/internal/mapr/logformat/generic.go b/internal/mapr/logformat/generic.go
index 14ac2a9..32d9b4a 100644
--- a/internal/mapr/logformat/generic.go
+++ b/internal/mapr/logformat/generic.go
@@ -1,7 +1,18 @@
package logformat
-// MakeFieldsGENERIC is the generic log line parser.
-func (p *Parser) MakeFieldsGENERIC(maprLine string) (map[string]string, error) {
+type genericParser struct {
+ defaultParser
+}
+
+func newGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*genericParser, error) {
+ defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return &genericParser{}, err
+ }
+ return &genericParser{defaultParser: *defaultParser}, nil
+}
+
+func (p *genericParser) MakeFields(maprLine string) (map[string]string, error) {
fields := make(map[string]string, 3)
fields["*"] = "*"
diff --git a/internal/mapr/logformat/generickv.go b/internal/mapr/logformat/generickv.go
index 3452e97..9c3de92 100644
--- a/internal/mapr/logformat/generickv.go
+++ b/internal/mapr/logformat/generickv.go
@@ -6,8 +6,19 @@ import (
"github.com/mimecast/dtail/internal/protocol"
)
-// MakeFieldsGENERIGKV is the generic key-value logfile parser.
-func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) {
+type genericKVParser struct {
+ defaultParser
+}
+
+func newGenericKVParser(hostname, timeZoneName string, timeZoneOffset int) (*genericKVParser, error) {
+ defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return &genericKVParser{}, err
+ }
+ return &genericKVParser{defaultParser: *defaultParser}, nil
+}
+
+func (p *genericKVParser) MakeFields(maprLine string) (map[string]string, error) {
splitted := strings.Split(maprLine, protocol.FieldDelimiter)
fields := make(map[string]string, len(splitted))
diff --git a/internal/mapr/logformat/mimecast.go b/internal/mapr/logformat/mimecast.go
new file mode 100644
index 0000000..cf6b333
--- /dev/null
+++ b/internal/mapr/logformat/mimecast.go
@@ -0,0 +1,23 @@
+//go:build !proprietary
+// +build !proprietary
+
+package logformat
+
+import "errors"
+
+// ErrMimecastNotAvailable is thrown in the open source version of DTail
+var ErrMimecastNotAvailable error = errors.New("The mimecast logformat is not available in this build of DTail")
+
+type mimecastParser struct{}
+
+func newMimecastParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) {
+ return &mimecastParser{}, ErrMimecastNotAvailable
+}
+
+func newMimecastGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) {
+ return &mimecastParser{}, ErrMimecastNotAvailable
+}
+
+func (p *mimecastParser) MakeFields(maprLine string) (map[string]string, error) {
+ return nil, ErrMimecastNotAvailable
+}
diff --git a/internal/mapr/logformat/parser.go b/internal/mapr/logformat/parser.go
index d6aac8c..37d7a63 100644
--- a/internal/mapr/logformat/parser.go
+++ b/internal/mapr/logformat/parser.go
@@ -3,8 +3,6 @@ package logformat
import (
"errors"
"fmt"
- "reflect"
- "strings"
"time"
"github.com/mimecast/dtail/internal/config"
@@ -15,62 +13,44 @@ import (
var ErrIgnoreFields error = errors.New("Ignore this field set")
// Parser is used to parse the mapreduce information from the server log files.
-type Parser struct {
- hostname string
- logFormatName string
- makeFieldsFunc reflect.Value
- makeFieldsReceiver reflect.Value
- timeZoneName string
- timeZoneOffset string
+type Parser interface {
+ // MakeFields creates a field map from an input log line.
+ MakeFields(string) (map[string]string, error)
}
// NewParser returns a new log parser.
-func NewParser(logFormatName string, query *mapr.Query) (*Parser, error) {
+func NewParser(logFormatName string, query *mapr.Query) (Parser, error) {
hostname, err := config.Hostname()
if err != nil {
return nil, err
}
now := time.Now()
- zone, offset := now.Zone()
-
- p := Parser{
- hostname: hostname,
- timeZoneName: zone,
- timeZoneOffset: fmt.Sprintf("%d", offset),
- }
-
- err = p.reflectLogFormat(logFormatName)
- if err != nil {
- return nil, err
- }
- return &p, nil
-}
-
-// The aim of this is that everyone can plug in their own mapr log format
-// parsing method to DTail. Just add a method MakeFieldsMODULENAME to type
-// Parser. Whereas MODULENAME must be a upeprcase string.
-func (p *Parser) reflectLogFormat(logFormatName string) error {
- methodName := fmt.Sprintf("MakeFields%s", strings.ToUpper(logFormatName))
- rt := reflect.TypeOf(p)
- method, ok := rt.MethodByName(methodName)
- if !ok {
- return errors.New("No such mapr log format module: " + methodName)
- }
-
- p.makeFieldsFunc = method.Func
- p.makeFieldsReceiver = reflect.ValueOf(p)
- return nil
-}
-
-// MakeFields is for returning the fields from a given log line.
-func (p *Parser) MakeFields(maprLine string) (fields map[string]string, err error) {
- inputValues := []reflect.Value{p.makeFieldsReceiver, reflect.ValueOf(maprLine)}
- returnValues := p.makeFieldsFunc.Call(inputValues)
- errInterface := returnValues[1].Interface()
- if errInterface == nil {
- fields, err = returnValues[0].Interface().(map[string]string), nil
- return
+ timeZoneName, timeZoneOffset := now.Zone()
+
+ // Extend this for adding more log formats!
+ switch logFormatName {
+ case "generic":
+ return newGenericParser(hostname, timeZoneName, timeZoneOffset)
+ case "generickv":
+ return newGenericKVParser(hostname, timeZoneName, timeZoneOffset)
+ case "csv":
+ return newCSVParser(hostname, timeZoneName, timeZoneOffset)
+ case "mimecast":
+ return newMimecastParser(hostname, timeZoneName, timeZoneOffset)
+ case "mimecastgeneric":
+ return newMimecastGenericParser(hostname, timeZoneName, timeZoneOffset)
+ case "default":
+ return newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ case "custom1":
+ return newCustom1Parser(hostname, timeZoneName, timeZoneOffset)
+ case "custom2":
+ return newCustom2Parser(hostname, timeZoneName, timeZoneOffset)
+ default:
+ p, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset)
+ if err != nil {
+ return p, fmt.Errorf("No '%s' mapr log format and problem creating default one: %v",
+ logFormatName, err)
+ }
+ return p, fmt.Errorf("No '%s' mapr log format", logFormatName)
}
- fields, err = returnValues[0].Interface().(map[string]string), errInterface.(error)
- return
}
diff --git a/internal/mapr/query.go b/internal/mapr/query.go
index 247cdaf..ddcbc90 100644
--- a/internal/mapr/query.go
+++ b/internal/mapr/query.go
@@ -73,6 +73,13 @@ func NewQuery(queryStr string) (*Query, error) {
Interval: time.Second * 5,
Limit: -1,
}
+
+ // If log format is CSV, then use "." as the table. It means, that
+ // we don't do any file filtering, we process all lines of the CSV.
+ if q.LogFormat == "csv" {
+ q.Table = "."
+ }
+
return &q, q.parse(tokens)
}
@@ -87,8 +94,7 @@ func (q *Query) Has(what string) bool {
}
func (q *Query) parse(tokens []token) error {
- tokens, err := q.parseTokens(tokens)
- if err != nil {
+ if _, err := q.parseTokens(tokens); err != nil {
return err
}
diff --git a/internal/mapr/query_test.go b/internal/mapr/query_test.go
index f03ccba..f37b8d4 100644
--- a/internal/mapr/query_test.go
+++ b/internal/mapr/query_test.go
@@ -252,3 +252,43 @@ func TestParseQueryDeep(t *testing.T) {
}
}
}
+
+func TestQuotedSelectCondition(t *testing.T) {
+ queryStr := "select `count($foo)`, foo, $foo, count($foo) logformat csv"
+
+ q, err := NewQuery(queryStr)
+ if err != nil {
+ t.Errorf("Query parse error: %s\n%v: %v", queryStr, q, err)
+ }
+ if len(q.Select) != 4 {
+ t.Errorf("Expected three elements in 'select' clause but got '%v': %s\n%v",
+ q.Select, queryStr, q)
+ }
+
+ if q.Select[0].Field != "count($foo)" {
+ t.Errorf("Expected 'num($foo)' as first element in 'select' clause but got '%v': %s\n%v",
+ q.Select[0].Field, queryStr, q)
+ }
+ if q.Select[0].Operation != Last {
+ t.Errorf("Expected 'Last' as aggregation function of first element in "+
+ "'select' clause but got '%v': %s\n%v", q.Select[0].Operation, queryStr, q)
+ }
+
+ if q.Select[1].Field != "foo" {
+ t.Errorf("Expected 'foo' as first element in 'select' clause but got '%v': %s\n%v",
+ q.Select[1].Field, queryStr, q)
+ }
+ if q.Select[2].Field != "$foo" {
+ t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v",
+ q.Select[2].Field, queryStr, q)
+ }
+
+ if q.Select[3].Field != "$foo" {
+ t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v",
+ q.Select[3].Field, queryStr, q)
+ }
+ if q.Select[3].Operation != Count {
+ t.Errorf("Expected 'count' as aggregation function of thourth element in "+
+ "'select' clause but got '%v': %s\n%v", q.Select[3].Operation, queryStr, q)
+ }
+}
diff --git a/internal/mapr/selectcondition.go b/internal/mapr/selectcondition.go
index 45fc16b..78359c7 100644
--- a/internal/mapr/selectcondition.go
+++ b/internal/mapr/selectcondition.go
@@ -40,16 +40,18 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) {
// Parse select aggregation, e.g. sum(foo)
parse := func(token token) (selectCondition, error) {
var sc selectCondition
- tokenStr := token.str
- if !strings.Contains(tokenStr, "(") && !strings.Contains(tokenStr, ")") {
- sc.Field = tokenStr
- sc.FieldStorage = tokenStr
+ // With quotes stripped: We got a quoted select expression, e.g.
+ // "select `count($foo)` ...", which will literaly look for field
+ // "count($foo)" without performing the count aggregation.
+ if token.quotesStripped || (!strings.Contains(token.str, "(") && !strings.Contains(token.str, ")")) {
+ sc.Field = token.str
+ sc.FieldStorage = token.str
sc.Operation = Last
return sc, nil
}
- a := strings.Split(tokenStr, "(")
+ a := strings.Split(token.str, "(")
if len(a) != 2 {
return sc, errors.New(invalidQuery + "Can't parse 'select' aggregation: " +
token.str)
@@ -61,8 +63,8 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) {
return sc, errors.New(invalidQuery + "Can't parse 'select' field name " +
"from aggregation: " + token.str)
}
- sc.Field = b[0] // Field name, e.g. 'foo'
- sc.FieldStorage = tokenStr // e.g. 'sum(foo)'
+ sc.Field = b[0] // Field name, e.g. 'foo'
+ sc.FieldStorage = token.str // e.g. 'sum(foo)'
switch agg {
case "count":
@@ -80,8 +82,7 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) {
case "len":
sc.Operation = Len
default:
- return sc, errors.New(invalidQuery +
- "Unknown aggregation in 'select' clause: " + agg)
+ return sc, errors.New(invalidQuery + "Unknown aggregation in 'select' clause: " + agg)
}
return sc, nil
}
diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go
index ed32f8f..4f14751 100644
--- a/internal/mapr/server/aggregate.go
+++ b/internal/mapr/server/aggregate.go
@@ -27,7 +27,7 @@ type Aggregate struct {
// The mapr query
query *mapr.Query
// The mapr log format parser
- parser *logformat.Parser
+ parser logformat.Parser
}
// NewAggregate return a new server side aggregator.
diff --git a/internal/mapr/setcondition.go b/internal/mapr/setcondition.go
index 9dcd690..308a0f4 100644
--- a/internal/mapr/setcondition.go
+++ b/internal/mapr/setcondition.go
@@ -37,6 +37,13 @@ func makeSetConditions(tokens []token) (set []setCondition, err error) {
return sc, nil, err
}
+ // Seems like a quoted string? E.g.: "set $foo = `count(bar)`"
+ // So don't interpret `count` as a function!
+ if tokens[2].quotesStripped {
+ sc.rType = Field
+ return sc, tokens[3:], nil
+ }
+
// Seems like a function call?
if strings.HasSuffix(sc.rString, ")") {
functionStack, functionArg, err := funcs.NewFunctionStack(tokens[2].str)
diff --git a/internal/mapr/token.go b/internal/mapr/token.go
index 6ac7631..48d1192 100644
--- a/internal/mapr/token.go
+++ b/internal/mapr/token.go
@@ -9,8 +9,9 @@ var keywords = [...]string{"select", "from", "where", "set", "group", "rorder",
// Represents a parsed token, used to parse the mapr query.
type token struct {
- str string
- isBareword bool
+ str string
+ isBareword bool
+ quotesStripped bool
}
func (t token) isKeyword() bool {
@@ -71,8 +72,9 @@ func tokensConsume(tokens []token) ([]token, []token) {
stripped := t.str[1 : length-1]
//dlog.Common.Trace("stripped", stripped)
t := token{
- str: stripped,
- isBareword: t.isBareword,
+ str: stripped,
+ isBareword: t.isBareword,
+ quotesStripped: true,
}
consumed = append(consumed, t)
continue
diff --git a/internal/version/version.go b/internal/version/version.go
index 90073b5..15ea50f 100644
--- a/internal/version/version.go
+++ b/internal/version/version.go
@@ -13,7 +13,7 @@ const (
// Name of DTail.
Name string = "DTail"
// Version of DTail.
- Version string = "4.2.0"
+ Version string = "4.3.0"
// Additional information for DTail
Additional string = "Have a lot of fun!"
)