From 9519beb28ecc7fa9fc54dba51a95ba4f5d7293a3 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 2 Mar 2021 11:22:29 -0500 Subject: [PATCH] Remove the _ml/find-file-structure docs --- .../apis/find-file-structure.asciidoc | 1955 ----------------- .../ml/anomaly-detection/apis/index.asciidoc | 2 - .../anomaly-detection/apis/ml-apis.asciidoc | 6 - 3 files changed, 1963 deletions(-) delete mode 100644 docs/reference/ml/anomaly-detection/apis/find-file-structure.asciidoc diff --git a/docs/reference/ml/anomaly-detection/apis/find-file-structure.asciidoc b/docs/reference/ml/anomaly-detection/apis/find-file-structure.asciidoc deleted file mode 100644 index 49119526fd3ad..0000000000000 --- a/docs/reference/ml/anomaly-detection/apis/find-file-structure.asciidoc +++ /dev/null @@ -1,1955 +0,0 @@ -[role="xpack"] -[testenv="basic"] -[[ml-find-file-structure]] -= Find file structure API -++++ -Find file structure -++++ - -deprecated::[7.12.0,Replaced by <>] - -Finds the structure of a text file. The text file must contain data that is -suitable to be ingested into {es}. - - -[[ml-find-file-structure-request]] -== {api-request-title} - -`POST _ml/find_file_structure` - - -[[ml-find-file-structure-prereqs]] -== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor_ml` or -`monitor` cluster privileges to use this API. See -<> and -{ml-docs-setup-privileges}. - - -[[ml-find-file-structure-desc]] -== {api-description-title} - -This API provides a starting point for ingesting data into {es} in a format that -is suitable for subsequent use with other {ml} functionality. - -Unlike other {es} endpoints, the data that is posted to this endpoint does not -need to be UTF-8 encoded and in JSON format. It must, however, be text; binary -file formats are not currently supported. - -The response from the API contains: - -* A couple of messages from the beginning of the file. -* Statistics that reveal the most common values for all fields detected within - the file and basic numeric statistics for numeric fields. -* Information about the structure of the file, which is useful when you write - ingest configurations to index the file contents. -* Appropriate mappings for an {es} index, which you could use to ingest the file - contents. - -All this information can be calculated by the structure finder with no guidance. -However, you can optionally override some of the decisions about the file -structure by specifying one or more query parameters. - -Details of the output can be seen in the -<>. - -If the structure finder produces unexpected results for a particular file, -specify the `explain` query parameter. It causes an `explanation` to appear in -the response, which should help in determining why the returned structure was -chosen. - - -[[ml-find-file-structure-query-parms]] -== {api-query-parms-title} - -`charset`:: - (Optional, string) The file's character set. It must be a character set that - is supported by the JVM that {es} uses. For example, `UTF-8`, `UTF-16LE`, - `windows-1252`, or `EUC-JP`. If this parameter is not specified, the structure - finder chooses an appropriate character set. - -`column_names`:: - (Optional, string) If you have set `format` to `delimited`, you can specify - the column names in a comma-separated list. If this parameter is not specified, - the structure finder uses the column names from the header row of the file. If - the file does not have a header role, columns are named "column1", "column2", - "column3", etc. - -`delimiter`:: - (Optional, string) If you have set `format` to `delimited`, you can specify - the character used to delimit the values in each row. Only a single character - is supported; the delimiter cannot have multiple characters. By default, the - API considers the following possibilities: comma, tab, semi-colon, and pipe - (`|`). In this default scenario, all rows must have the same number of fields - for the delimited format to be detected. If you specify a delimiter, up to 10% - of the rows can have a different number of columns than the first row. - -`explain`:: - (Optional, Boolean) If this parameter is set to `true`, the response includes - a field named `explanation`, which is an array of strings that indicate how - the structure finder produced its result. The default value is `false`. - -`format`:: -(Optional, string) The high level structure of the file. Valid values are -`ndjson`, `xml`, `delimited`, and `semi_structured_text`. By default, the -API chooses the format. In this default scenario, all rows must -have the same number of fields for a delimited format to be detected. If the -`format` is set to `delimited` and the `delimiter` is not set, however, the -API tolerates up to 5% of rows that have a different number of -columns than the first row. - -`grok_pattern`:: - (Optional, string) If you have set `format` to `semi_structured_text`, you can - specify a Grok pattern that is used to extract fields from every message in - the file. The name of the timestamp field in the Grok pattern must match what - is specified in the `timestamp_field` parameter. If that parameter is not - specified, the name of the timestamp field in the Grok pattern must match - "timestamp". If `grok_pattern` is not specified, the structure finder creates - a Grok pattern. - -`has_header_row`:: - (Optional, Boolean) If you have set `format` to `delimited`, you can use this - parameter to indicate whether the column names are in the first row of the - file. If this parameter is not specified, the structure finder guesses based - on the similarity of the first row of the file to other rows. - -`line_merge_size_limit`:: - (Optional, unsigned integer) The maximum number of characters in a message - when lines are merged to form messages while analyzing semi-structured files. - The default is `10000`. If you have extremely long messages you may need to - increase this, but be aware that this may lead to very long processing times - if the way to group lines into messages is misdetected. - -`lines_to_sample`:: - (Optional, unsigned integer) The number of lines to include in the structural - analysis, starting from the beginning of the file. The minimum is 2; the - default is `1000`. If the value of this parameter is greater than the number - of lines in the file, the analysis proceeds (as long as there are at least two - lines in the file) for all of the lines. + -+ --- -NOTE: The number of lines and the variation of the lines affects the speed of -the analysis. For example, if you upload a log file where the first 1000 lines -are all variations on the same message, the analysis will find more commonality -than would be seen with a bigger sample. If possible, however, it is more -efficient to upload a sample file with more variety in the first 1000 lines than -to request analysis of 100000 lines to achieve some variety. --- - -`quote`:: - (Optional, string) If you have set `format` to `delimited`, you can specify - the character used to quote the values in each row if they contain newlines or - the delimiter character. Only a single character is supported. If this - parameter is not specified, the default value is a double quote (`"`). If your - delimited file format does not use quoting, a workaround is to set this - argument to a character that does not appear anywhere in the sample. - -`should_trim_fields`:: - (Optional, Boolean) If you have set `format` to `delimited`, you can specify - whether values between delimiters should have whitespace trimmed from them. If - this parameter is not specified and the delimiter is pipe (`|`), the default - value is `true`. Otherwise, the default value is `false`. - -`timeout`:: - (Optional, <>) Sets the maximum amount of time that the - structure analysis make take. If the analysis is still running when the - timeout expires then it will be aborted. The default value is 25 seconds. - -`timestamp_field`:: - (Optional, string) The name of the field that contains the primary timestamp - of each record in the file. In particular, if the file were ingested into an - index, this is the field that would be used to populate the `@timestamp` field. -+ --- -If the `format` is `semi_structured_text`, this field must match the name of the -appropriate extraction in the `grok_pattern`. Therefore, for semi-structured -file formats, it is best not to specify this parameter unless `grok_pattern` is -also specified. - -For structured file formats, if you specify this parameter, the field must exist -within the file. - -If this parameter is not specified, the structure finder makes a decision about -which field (if any) is the primary timestamp field. For structured file -formats, it is not compulsory to have a timestamp in the file. --- - -`timestamp_format`:: - (Optional, string) The Java time format of the timestamp field in the file. -+ --- -Only a subset of Java time format letter groups are supported: - -* `a` -* `d` -* `dd` -* `EEE` -* `EEEE` -* `H` -* `HH` -* `h` -* `M` -* `MM` -* `MMM` -* `MMMM` -* `mm` -* `ss` -* `XX` -* `XXX` -* `yy` -* `yyyy` -* `zzz` - -Additionally `S` letter groups (fractional seconds) of length one to nine are -supported providing they occur after `ss` and separated from the `ss` by a `.`, -`,` or `:`. Spacing and punctuation is also permitted with the exception of `?`, -newline and carriage return, together with literal text enclosed in single -quotes. For example, `MM/dd HH.mm.ss,SSSSSS 'in' yyyy` is a valid override -format. - -One valuable use case for this parameter is when the format is semi-structured -text, there are multiple timestamp formats in the file, and you know which -format corresponds to the primary timestamp, but you do not want to specify the -full `grok_pattern`. Another is when the timestamp format is one that the -structure finder does not consider by default. - -If this parameter is not specified, the structure finder chooses the best -format from a built-in set. - -The following table provides the appropriate `timeformat` values for some example timestamps: - -|=== -| Timeformat | Presentation - -| yyyy-MM-dd HH:mm:ssZ | 2019-04-20 13:15:22+0000 -| EEE, d MMM yyyy HH:mm:ss Z | Sat, 20 Apr 2019 13:15:22 +0000 -| dd.MM.yy HH:mm:ss.SSS | 20.04.19 13:15:22.285 -|=== - -See -https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[the Java date/time format documentation] -for more information about date and time format syntax. - --- - -[[ml-find-file-structure-request-body]] -== {api-request-body-title} - -The text file that you want to analyze. It must contain data that is suitable to -be ingested into {es}. It does not need to be in JSON format and it does not -need to be UTF-8 encoded. The size is limited to the {es} HTTP receive buffer -size, which defaults to 100 Mb. - -[[ml-find-file-structure-examples]] -== {api-examples-title} - -[[ml-find-file-structure-example-nld-json]] -=== Ingesting newline-delimited JSON - -Suppose you have a newline-delimited JSON file that contains information about -some books. You can send the contents to the `find_file_structure` endpoint: - -[source,console] ----- -POST _ml/find_file_structure -{"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561} -{"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482} -{"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604} -{"name": "Dune Messiah", "author": "Frank Herbert", "release_date": "1969-10-15", "page_count": 331} -{"name": "Children of Dune", "author": "Frank Herbert", "release_date": "1976-04-21", "page_count": 408} -{"name": "God Emperor of Dune", "author": "Frank Herbert", "release_date": "1981-05-28", "page_count": 454} -{"name": "Consider Phlebas", "author": "Iain M. Banks", "release_date": "1987-04-23", "page_count": 471} -{"name": "Pandora's Star", "author": "Peter F. Hamilton", "release_date": "2004-03-02", "page_count": 768} -{"name": "Revelation Space", "author": "Alastair Reynolds", "release_date": "2000-03-15", "page_count": 585} -{"name": "A Fire Upon the Deep", "author": "Vernor Vinge", "release_date": "1992-06-01", "page_count": 613} -{"name": "Ender's Game", "author": "Orson Scott Card", "release_date": "1985-06-01", "page_count": 324} -{"name": "1984", "author": "George Orwell", "release_date": "1985-06-01", "page_count": 328} -{"name": "Fahrenheit 451", "author": "Ray Bradbury", "release_date": "1953-10-15", "page_count": 227} -{"name": "Brave New World", "author": "Aldous Huxley", "release_date": "1932-06-01", "page_count": 268} -{"name": "Foundation", "author": "Isaac Asimov", "release_date": "1951-06-01", "page_count": 224} -{"name": "The Giver", "author": "Lois Lowry", "release_date": "1993-04-26", "page_count": 208} -{"name": "Slaughterhouse-Five", "author": "Kurt Vonnegut", "release_date": "1969-06-01", "page_count": 275} -{"name": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "release_date": "1979-10-12", "page_count": 180} -{"name": "Snow Crash", "author": "Neal Stephenson", "release_date": "1992-06-01", "page_count": 470} -{"name": "Neuromancer", "author": "William Gibson", "release_date": "1984-07-01", "page_count": 271} -{"name": "The Handmaid's Tale", "author": "Margaret Atwood", "release_date": "1985-06-01", "page_count": 311} -{"name": "Starship Troopers", "author": "Robert A. Heinlein", "release_date": "1959-12-01", "page_count": 335} -{"name": "The Left Hand of Darkness", "author": "Ursula K. Le Guin", "release_date": "1969-06-01", "page_count": 304} -{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288} ----- -// TEST[warning:[POST /_ml/find_file_structure] is deprecated! Use [POST /_text_structure/find_structure] instead.] - -If the request does not encounter errors, you receive the following result: - -[source,console-result] ----- -{ - "num_lines_analyzed" : 24, <1> - "num_messages_analyzed" : 24, <2> - "sample_start" : "{\"name\": \"Leviathan Wakes\", \"author\": \"James S.A. Corey\", \"release_date\": \"2011-06-02\", \"page_count\": 561}\n{\"name\": \"Hyperion\", \"author\": \"Dan Simmons\", \"release_date\": \"1989-05-26\", \"page_count\": 482}\n", <3> - "charset" : "UTF-8", <4> - "has_byte_order_marker" : false, <5> - "format" : "ndjson", <6> - "timestamp_field" : "release_date", <7> - "joda_timestamp_formats" : [ <8> - "ISO8601" - ], - "java_timestamp_formats" : [ <9> - "ISO8601" - ], - "need_client_timezone" : true, <10> - "mappings" : { <11> - "properties" : { - "@timestamp" : { - "type" : "date" - }, - "author" : { - "type" : "keyword" - }, - "name" : { - "type" : "keyword" - }, - "page_count" : { - "type" : "long" - }, - "release_date" : { - "type" : "date", - "format" : "iso8601" - } - } - }, - "ingest_pipeline" : { - "description" : "Ingest pipeline created by text structure finder", - "processors" : [ - { - "date" : { - "field" : "release_date", - "timezone" : "{{ event.timezone }}", - "formats" : [ - "ISO8601" - ] - } - } - ] - }, - "field_stats" : { <12> - "author" : { - "count" : 24, - "cardinality" : 20, - "top_hits" : [ - { - "value" : "Frank Herbert", - "count" : 4 - }, - { - "value" : "Robert A. Heinlein", - "count" : 2 - }, - { - "value" : "Alastair Reynolds", - "count" : 1 - }, - { - "value" : "Aldous Huxley", - "count" : 1 - }, - { - "value" : "Dan Simmons", - "count" : 1 - }, - { - "value" : "Douglas Adams", - "count" : 1 - }, - { - "value" : "George Orwell", - "count" : 1 - }, - { - "value" : "Iain M. Banks", - "count" : 1 - }, - { - "value" : "Isaac Asimov", - "count" : 1 - }, - { - "value" : "James S.A. Corey", - "count" : 1 - } - ] - }, - "name" : { - "count" : 24, - "cardinality" : 24, - "top_hits" : [ - { - "value" : "1984", - "count" : 1 - }, - { - "value" : "A Fire Upon the Deep", - "count" : 1 - }, - { - "value" : "Brave New World", - "count" : 1 - }, - { - "value" : "Children of Dune", - "count" : 1 - }, - { - "value" : "Consider Phlebas", - "count" : 1 - }, - { - "value" : "Dune", - "count" : 1 - }, - { - "value" : "Dune Messiah", - "count" : 1 - }, - { - "value" : "Ender's Game", - "count" : 1 - }, - { - "value" : "Fahrenheit 451", - "count" : 1 - }, - { - "value" : "Foundation", - "count" : 1 - } - ] - }, - "page_count" : { - "count" : 24, - "cardinality" : 24, - "min_value" : 180, - "max_value" : 768, - "mean_value" : 387.0833333333333, - "median_value" : 329.5, - "top_hits" : [ - { - "value" : 180, - "count" : 1 - }, - { - "value" : 208, - "count" : 1 - }, - { - "value" : 224, - "count" : 1 - }, - { - "value" : 227, - "count" : 1 - }, - { - "value" : 268, - "count" : 1 - }, - { - "value" : 271, - "count" : 1 - }, - { - "value" : 275, - "count" : 1 - }, - { - "value" : 288, - "count" : 1 - }, - { - "value" : 304, - "count" : 1 - }, - { - "value" : 311, - "count" : 1 - } - ] - }, - "release_date" : { - "count" : 24, - "cardinality" : 20, - "earliest" : "1932-06-01", - "latest" : "2011-06-02", - "top_hits" : [ - { - "value" : "1985-06-01", - "count" : 3 - }, - { - "value" : "1969-06-01", - "count" : 2 - }, - { - "value" : "1992-06-01", - "count" : 2 - }, - { - "value" : "1932-06-01", - "count" : 1 - }, - { - "value" : "1951-06-01", - "count" : 1 - }, - { - "value" : "1953-10-15", - "count" : 1 - }, - { - "value" : "1959-12-01", - "count" : 1 - }, - { - "value" : "1965-06-01", - "count" : 1 - }, - { - "value" : "1966-04-01", - "count" : 1 - }, - { - "value" : "1969-10-15", - "count" : 1 - } - ] - } - } -} ----- -// TESTRESPONSE[s/"sample_start" : ".*",/"sample_start" : "$body.sample_start",/] -// The substitution is because the "file" is pre-processed by the test harness, -// so the fields may get reordered in the JSON the endpoint sees - -<1> `num_lines_analyzed` indicates how many lines of the file were analyzed. -<2> `num_messages_analyzed` indicates how many distinct messages the lines contained. - For NDJSON, this value is the same as `num_lines_analyzed`. For other file - formats, messages can span several lines. -<3> `sample_start` reproduces the first two messages in the file verbatim. This - may help to diagnose parse errors or accidental uploads of the wrong file. -<4> `charset` indicates the character encoding used to parse the file. -<5> For UTF character encodings, `has_byte_order_marker` indicates whether the - file begins with a byte order marker. -<6> `format` is one of `ndjson`, `xml`, `delimited` or `semi_structured_text`. -<7> The `timestamp_field` names the field considered most likely to be the - primary timestamp of each document. -<8> `joda_timestamp_formats` are used to tell Logstash how to parse timestamps. -<9> `java_timestamp_formats` are the Java time formats recognized in the time - fields. Elasticsearch mappings and Ingest pipeline use this format. -<10> If a timestamp format is detected that does not include a timezone, - `need_client_timezone` will be `true`. The server that parses the file must - therefore be told the correct timezone by the client. -<11> `mappings` contains some suitable mappings for an index into which the data - could be ingested. In this case, the `release_date` field has been given a - `keyword` type as it is not considered specific enough to convert to the - `date` type. -<12> `field_stats` contains the most common values of each field, plus basic - numeric statistics for the numeric `page_count` field. This information - may provide clues that the data needs to be cleaned or transformed prior - to use by other {ml} functionality. - - -[[ml-find-file-structure-example-nyc]] -=== Finding the structure of NYC yellow cab example data - -The next example shows how it's possible to find the structure of some New York -City yellow cab trip data. The first `curl` command downloads the data, the -first 20000 lines of which are then piped into the `find_file_structure` -endpoint. The `lines_to_sample` query parameter of the endpoint is set to 20000 -to match what is specified in the `head` command. - -[source,js] ----- -curl -s "s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-06.csv" | head -20000 | curl -s -H "Content-Type: application/json" -XPOST "localhost:9200/_ml/find_file_structure?pretty&lines_to_sample=20000" -T - ----- -// NOTCONSOLE -// Not converting to console because this shows how curl can be used - --- -NOTE: The `Content-Type: application/json` header must be set even though in -this case the data is not JSON. (Alternatively the `Content-Type` can be set -to any other supported by {es}, but it must be set.) --- - -If the request does not encounter errors, you receive the following result: - -[source,js] ----- -{ - "num_lines_analyzed" : 20000, - "num_messages_analyzed" : 19998, <1> - "sample_start" : "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount\n\n1,2018-06-01 00:15:40,2018-06-01 00:16:46,1,.00,1,N,145,145,2,3,0.5,0.5,0,0,0.3,4.3\n", - "charset" : "UTF-8", - "has_byte_order_marker" : false, - "format" : "delimited", <2> - "multiline_start_pattern" : "^.*?,\"?\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", - "exclude_lines_pattern" : "^\"?VendorID\"?,\"?tpep_pickup_datetime\"?,\"?tpep_dropoff_datetime\"?,\"?passenger_count\"?,\"?trip_distance\"?,\"?RatecodeID\"?,\"?store_and_fwd_flag\"?,\"?PULocationID\"?,\"?DOLocationID\"?,\"?payment_type\"?,\"?fare_amount\"?,\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?", - "column_names" : [ <3> - "VendorID", - "tpep_pickup_datetime", - "tpep_dropoff_datetime", - "passenger_count", - "trip_distance", - "RatecodeID", - "store_and_fwd_flag", - "PULocationID", - "DOLocationID", - "payment_type", - "fare_amount", - "extra", - "mta_tax", - "tip_amount", - "tolls_amount", - "improvement_surcharge", - "total_amount" - ], - "has_header_row" : true, <4> - "delimiter" : ",", <5> - "quote" : "\"", <6> - "timestamp_field" : "tpep_pickup_datetime", <7> - "joda_timestamp_formats" : [ <8> - "YYYY-MM-dd HH:mm:ss" - ], - "java_timestamp_formats" : [ <9> - "yyyy-MM-dd HH:mm:ss" - ], - "need_client_timezone" : true, <10> - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date" - }, - "DOLocationID" : { - "type" : "long" - }, - "PULocationID" : { - "type" : "long" - }, - "RatecodeID" : { - "type" : "long" - }, - "VendorID" : { - "type" : "long" - }, - "extra" : { - "type" : "double" - }, - "fare_amount" : { - "type" : "double" - }, - "improvement_surcharge" : { - "type" : "double" - }, - "mta_tax" : { - "type" : "double" - }, - "passenger_count" : { - "type" : "long" - }, - "payment_type" : { - "type" : "long" - }, - "store_and_fwd_flag" : { - "type" : "keyword" - }, - "tip_amount" : { - "type" : "double" - }, - "tolls_amount" : { - "type" : "double" - }, - "total_amount" : { - "type" : "double" - }, - "tpep_dropoff_datetime" : { - "type" : "date", - "format" : "yyyy-MM-dd HH:mm:ss" - }, - "tpep_pickup_datetime" : { - "type" : "date", - "format" : "yyyy-MM-dd HH:mm:ss" - }, - "trip_distance" : { - "type" : "double" - } - } - }, - "ingest_pipeline" : { - "description" : "Ingest pipeline created by text structure finder", - "processors" : [ - { - "csv" : { - "field" : "message", - "target_fields" : [ - "VendorID", - "tpep_pickup_datetime", - "tpep_dropoff_datetime", - "passenger_count", - "trip_distance", - "RatecodeID", - "store_and_fwd_flag", - "PULocationID", - "DOLocationID", - "payment_type", - "fare_amount", - "extra", - "mta_tax", - "tip_amount", - "tolls_amount", - "improvement_surcharge", - "total_amount" - ] - } - }, - { - "date" : { - "field" : "tpep_pickup_datetime", - "timezone" : "{{ event.timezone }}", - "formats" : [ - "yyyy-MM-dd HH:mm:ss" - ] - } - }, - { - "convert" : { - "field" : "DOLocationID", - "type" : "long" - } - }, - { - "convert" : { - "field" : "PULocationID", - "type" : "long" - } - }, - { - "convert" : { - "field" : "RatecodeID", - "type" : "long" - } - }, - { - "convert" : { - "field" : "VendorID", - "type" : "long" - } - }, - { - "convert" : { - "field" : "extra", - "type" : "double" - } - }, - { - "convert" : { - "field" : "fare_amount", - "type" : "double" - } - }, - { - "convert" : { - "field" : "improvement_surcharge", - "type" : "double" - } - }, - { - "convert" : { - "field" : "mta_tax", - "type" : "double" - } - }, - { - "convert" : { - "field" : "passenger_count", - "type" : "long" - } - }, - { - "convert" : { - "field" : "payment_type", - "type" : "long" - } - }, - { - "convert" : { - "field" : "tip_amount", - "type" : "double" - } - }, - { - "convert" : { - "field" : "tolls_amount", - "type" : "double" - } - }, - { - "convert" : { - "field" : "total_amount", - "type" : "double" - } - }, - { - "convert" : { - "field" : "trip_distance", - "type" : "double" - } - }, - { - "remove" : { - "field" : "message" - } - } - ] - }, - "field_stats" : { - "DOLocationID" : { - "count" : 19998, - "cardinality" : 240, - "min_value" : 1, - "max_value" : 265, - "mean_value" : 150.26532653265312, - "median_value" : 148, - "top_hits" : [ - { - "value" : 79, - "count" : 760 - }, - { - "value" : 48, - "count" : 683 - }, - { - "value" : 68, - "count" : 529 - }, - { - "value" : 170, - "count" : 506 - }, - { - "value" : 107, - "count" : 468 - }, - { - "value" : 249, - "count" : 457 - }, - { - "value" : 230, - "count" : 441 - }, - { - "value" : 186, - "count" : 432 - }, - { - "value" : 141, - "count" : 409 - }, - { - "value" : 263, - "count" : 386 - } - ] - }, - "PULocationID" : { - "count" : 19998, - "cardinality" : 154, - "min_value" : 1, - "max_value" : 265, - "mean_value" : 153.4042404240424, - "median_value" : 148, - "top_hits" : [ - { - "value" : 79, - "count" : 1067 - }, - { - "value" : 230, - "count" : 949 - }, - { - "value" : 148, - "count" : 940 - }, - { - "value" : 132, - "count" : 897 - }, - { - "value" : 48, - "count" : 853 - }, - { - "value" : 161, - "count" : 820 - }, - { - "value" : 234, - "count" : 750 - }, - { - "value" : 249, - "count" : 722 - }, - { - "value" : 164, - "count" : 663 - }, - { - "value" : 114, - "count" : 646 - } - ] - }, - "RatecodeID" : { - "count" : 19998, - "cardinality" : 5, - "min_value" : 1, - "max_value" : 5, - "mean_value" : 1.0656565656565653, - "median_value" : 1, - "top_hits" : [ - { - "value" : 1, - "count" : 19311 - }, - { - "value" : 2, - "count" : 468 - }, - { - "value" : 5, - "count" : 195 - }, - { - "value" : 4, - "count" : 17 - }, - { - "value" : 3, - "count" : 7 - } - ] - }, - "VendorID" : { - "count" : 19998, - "cardinality" : 2, - "min_value" : 1, - "max_value" : 2, - "mean_value" : 1.59005900590059, - "median_value" : 2, - "top_hits" : [ - { - "value" : 2, - "count" : 11800 - }, - { - "value" : 1, - "count" : 8198 - } - ] - }, - "extra" : { - "count" : 19998, - "cardinality" : 3, - "min_value" : -0.5, - "max_value" : 0.5, - "mean_value" : 0.4815981598159816, - "median_value" : 0.5, - "top_hits" : [ - { - "value" : 0.5, - "count" : 19281 - }, - { - "value" : 0, - "count" : 698 - }, - { - "value" : -0.5, - "count" : 19 - } - ] - }, - "fare_amount" : { - "count" : 19998, - "cardinality" : 208, - "min_value" : -100, - "max_value" : 300, - "mean_value" : 13.937719771977209, - "median_value" : 9.5, - "top_hits" : [ - { - "value" : 6, - "count" : 1004 - }, - { - "value" : 6.5, - "count" : 935 - }, - { - "value" : 5.5, - "count" : 909 - }, - { - "value" : 7, - "count" : 903 - }, - { - "value" : 5, - "count" : 889 - }, - { - "value" : 7.5, - "count" : 854 - }, - { - "value" : 4.5, - "count" : 802 - }, - { - "value" : 8.5, - "count" : 790 - }, - { - "value" : 8, - "count" : 789 - }, - { - "value" : 9, - "count" : 711 - } - ] - }, - "improvement_surcharge" : { - "count" : 19998, - "cardinality" : 3, - "min_value" : -0.3, - "max_value" : 0.3, - "mean_value" : 0.29915991599159913, - "median_value" : 0.3, - "top_hits" : [ - { - "value" : 0.3, - "count" : 19964 - }, - { - "value" : -0.3, - "count" : 22 - }, - { - "value" : 0, - "count" : 12 - } - ] - }, - "mta_tax" : { - "count" : 19998, - "cardinality" : 3, - "min_value" : -0.5, - "max_value" : 0.5, - "mean_value" : 0.4962246224622462, - "median_value" : 0.5, - "top_hits" : [ - { - "value" : 0.5, - "count" : 19868 - }, - { - "value" : 0, - "count" : 109 - }, - { - "value" : -0.5, - "count" : 21 - } - ] - }, - "passenger_count" : { - "count" : 19998, - "cardinality" : 7, - "min_value" : 0, - "max_value" : 6, - "mean_value" : 1.6201620162016201, - "median_value" : 1, - "top_hits" : [ - { - "value" : 1, - "count" : 14219 - }, - { - "value" : 2, - "count" : 2886 - }, - { - "value" : 5, - "count" : 1047 - }, - { - "value" : 3, - "count" : 804 - }, - { - "value" : 6, - "count" : 523 - }, - { - "value" : 4, - "count" : 406 - }, - { - "value" : 0, - "count" : 113 - } - ] - }, - "payment_type" : { - "count" : 19998, - "cardinality" : 4, - "min_value" : 1, - "max_value" : 4, - "mean_value" : 1.315631563156316, - "median_value" : 1, - "top_hits" : [ - { - "value" : 1, - "count" : 13936 - }, - { - "value" : 2, - "count" : 5857 - }, - { - "value" : 3, - "count" : 160 - }, - { - "value" : 4, - "count" : 45 - } - ] - }, - "store_and_fwd_flag" : { - "count" : 19998, - "cardinality" : 2, - "top_hits" : [ - { - "value" : "N", - "count" : 19910 - }, - { - "value" : "Y", - "count" : 88 - } - ] - }, - "tip_amount" : { - "count" : 19998, - "cardinality" : 717, - "min_value" : 0, - "max_value" : 128, - "mean_value" : 2.010959095909593, - "median_value" : 1.45, - "top_hits" : [ - { - "value" : 0, - "count" : 6917 - }, - { - "value" : 1, - "count" : 1178 - }, - { - "value" : 2, - "count" : 624 - }, - { - "value" : 3, - "count" : 248 - }, - { - "value" : 1.56, - "count" : 206 - }, - { - "value" : 1.46, - "count" : 205 - }, - { - "value" : 1.76, - "count" : 196 - }, - { - "value" : 1.45, - "count" : 195 - }, - { - "value" : 1.36, - "count" : 191 - }, - { - "value" : 1.5, - "count" : 187 - } - ] - }, - "tolls_amount" : { - "count" : 19998, - "cardinality" : 26, - "min_value" : 0, - "max_value" : 35, - "mean_value" : 0.2729697969796978, - "median_value" : 0, - "top_hits" : [ - { - "value" : 0, - "count" : 19107 - }, - { - "value" : 5.76, - "count" : 791 - }, - { - "value" : 10.5, - "count" : 36 - }, - { - "value" : 2.64, - "count" : 21 - }, - { - "value" : 11.52, - "count" : 8 - }, - { - "value" : 5.54, - "count" : 4 - }, - { - "value" : 8.5, - "count" : 4 - }, - { - "value" : 17.28, - "count" : 4 - }, - { - "value" : 2, - "count" : 2 - }, - { - "value" : 2.16, - "count" : 2 - } - ] - }, - "total_amount" : { - "count" : 19998, - "cardinality" : 1267, - "min_value" : -100.3, - "max_value" : 389.12, - "mean_value" : 17.499898989898995, - "median_value" : 12.35, - "top_hits" : [ - { - "value" : 7.3, - "count" : 478 - }, - { - "value" : 8.3, - "count" : 443 - }, - { - "value" : 8.8, - "count" : 420 - }, - { - "value" : 6.8, - "count" : 406 - }, - { - "value" : 7.8, - "count" : 405 - }, - { - "value" : 6.3, - "count" : 371 - }, - { - "value" : 9.8, - "count" : 368 - }, - { - "value" : 5.8, - "count" : 362 - }, - { - "value" : 9.3, - "count" : 332 - }, - { - "value" : 10.3, - "count" : 332 - } - ] - }, - "tpep_dropoff_datetime" : { - "count" : 19998, - "cardinality" : 9066, - "earliest" : "2018-05-31 06:18:15", - "latest" : "2018-06-02 02:25:44", - "top_hits" : [ - { - "value" : "2018-06-01 01:12:12", - "count" : 10 - }, - { - "value" : "2018-06-01 00:32:15", - "count" : 9 - }, - { - "value" : "2018-06-01 00:44:27", - "count" : 9 - }, - { - "value" : "2018-06-01 00:46:42", - "count" : 9 - }, - { - "value" : "2018-06-01 01:03:22", - "count" : 9 - }, - { - "value" : "2018-06-01 01:05:13", - "count" : 9 - }, - { - "value" : "2018-06-01 00:11:20", - "count" : 8 - }, - { - "value" : "2018-06-01 00:16:03", - "count" : 8 - }, - { - "value" : "2018-06-01 00:19:47", - "count" : 8 - }, - { - "value" : "2018-06-01 00:25:17", - "count" : 8 - } - ] - }, - "tpep_pickup_datetime" : { - "count" : 19998, - "cardinality" : 8760, - "earliest" : "2018-05-31 06:08:31", - "latest" : "2018-06-02 01:21:21", - "top_hits" : [ - { - "value" : "2018-06-01 00:01:23", - "count" : 12 - }, - { - "value" : "2018-06-01 00:04:31", - "count" : 10 - }, - { - "value" : "2018-06-01 00:05:38", - "count" : 10 - }, - { - "value" : "2018-06-01 00:09:50", - "count" : 10 - }, - { - "value" : "2018-06-01 00:12:01", - "count" : 10 - }, - { - "value" : "2018-06-01 00:14:17", - "count" : 10 - }, - { - "value" : "2018-06-01 00:00:34", - "count" : 9 - }, - { - "value" : "2018-06-01 00:00:40", - "count" : 9 - }, - { - "value" : "2018-06-01 00:02:53", - "count" : 9 - }, - { - "value" : "2018-06-01 00:05:40", - "count" : 9 - } - ] - }, - "trip_distance" : { - "count" : 19998, - "cardinality" : 1687, - "min_value" : 0, - "max_value" : 64.63, - "mean_value" : 3.6521062106210715, - "median_value" : 2.16, - "top_hits" : [ - { - "value" : 0.9, - "count" : 335 - }, - { - "value" : 0.8, - "count" : 320 - }, - { - "value" : 1.1, - "count" : 316 - }, - { - "value" : 0.7, - "count" : 304 - }, - { - "value" : 1.2, - "count" : 303 - }, - { - "value" : 1, - "count" : 296 - }, - { - "value" : 1.3, - "count" : 280 - }, - { - "value" : 1.5, - "count" : 268 - }, - { - "value" : 1.6, - "count" : 268 - }, - { - "value" : 0.6, - "count" : 256 - } - ] - } - } -} ----- -// NOTCONSOLE - -<1> `num_messages_analyzed` is 2 lower than `num_lines_analyzed` because only - data records count as messages. The first line contains the column names - and in this sample the second line is blank. -<2> Unlike the first example, in this case the `format` has been identified as - `delimited`. -<3> Because the `format` is `delimited`, the `column_names` field in the output - lists the column names in the order they appear in the sample. -<4> `has_header_row` indicates that for this sample the column names were in - the first row of the sample. (If they hadn't been then it would have been - a good idea to specify them in the `column_names` query parameter.) -<5> The `delimiter` for this sample is a comma, as it's a CSV file. -<6> The `quote` character is the default double quote. (The structure finder - does not attempt to deduce any other quote character, so if you have a - delimited file that's quoted with some other character you must specify it - using the `quote` query parameter.) -<7> The `timestamp_field` has been chosen to be `tpep_pickup_datetime`. - `tpep_dropoff_datetime` would work just as well, but `tpep_pickup_datetime` - was chosen because it comes first in the column order. If you prefer - `tpep_dropoff_datetime` then force it to be chosen using the - `timestamp_field` query parameter. -<8> `joda_timestamp_formats` are used to tell Logstash how to parse timestamps. -<9> `java_timestamp_formats` are the Java time formats recognized in the time - fields. Elasticsearch mappings and Ingest pipeline use this format. -<10> The timestamp format in this sample doesn't specify a timezone, so to - accurately convert them to UTC timestamps to store in Elasticsearch it's - necessary to supply the timezone they relate to. `need_client_timezone` - will be `false` for timestamp formats that include the timezone. - - -[[ml-find-file-structure-example-timeout]] -=== Setting the timeout parameter - -If you try to analyze a lot of data then the analysis will take a long time. -If you want to limit the amount of processing your {es} cluster performs for -a request, use the `timeout` query parameter. The analysis will be aborted and -an error returned when the timeout expires. For example, you can replace 20000 -lines in the previous example with 200000 and set a 1 second timeout on the -analysis: - -[source,js] ----- -curl -s "s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-06.csv" | head -200000 | curl -s -H "Content-Type: application/json" -XPOST "localhost:9200/_ml/find_file_structure?pretty&lines_to_sample=200000&timeout=1s" -T - ----- -// NOTCONSOLE -// Not converting to console because this shows how curl can be used - -Unless you are using an incredibly fast computer you'll receive a timeout error: - -[source,js] ----- -{ - "error" : { - "root_cause" : [ - { - "type" : "timeout_exception", - "reason" : "Aborting structure analysis during [delimited record parsing] as it has taken longer than the timeout of [1s]" - } - ], - "type" : "timeout_exception", - "reason" : "Aborting structure analysis during [delimited record parsing] as it has taken longer than the timeout of [1s]" - }, - "status" : 500 -} ----- -// NOTCONSOLE - --- -NOTE: If you try the example above yourself you will note that the overall -running time of the `curl` commands is considerably longer than 1 second. This -is because it takes a while to download 200000 lines of CSV from the internet, -and the timeout is measured from the time this endpoint starts to process the -data. --- - - -[[ml-find-file-structure-example-eslog]] -=== Analyzing {es} log files - -This is an example of analyzing {es}'s own log file: - -[source,js] ----- -curl -s -H "Content-Type: application/json" -XPOST "localhost:9200/_ml/find_file_structure?pretty" -T "$ES_HOME/logs/elasticsearch.log" ----- -// NOTCONSOLE -// Not converting to console because this shows how curl can be used - -If the request does not encounter errors, the result will look something like -this: - -[source,js] ----- -{ - "num_lines_analyzed" : 53, - "num_messages_analyzed" : 53, - "sample_start" : "[2018-09-27T14:39:28,518][INFO ][o.e.e.NodeEnvironment ] [node-0] using [1] data paths, mounts [[/ (/dev/disk1)]], net usable_space [165.4gb], net total_space [464.7gb], types [hfs]\n[2018-09-27T14:39:28,521][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [494.9mb], compressed ordinary object pointers [true]\n", - "charset" : "UTF-8", - "has_byte_order_marker" : false, - "format" : "semi_structured_text", <1> - "multiline_start_pattern" : "^\\[\\b\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", <2> - "grok_pattern" : "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel}.*", <3> - "timestamp_field" : "timestamp", - "joda_timestamp_formats" : [ - "ISO8601" - ], - "java_timestamp_formats" : [ - "ISO8601" - ], - "need_client_timezone" : true, - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date" - }, - "loglevel" : { - "type" : "keyword" - }, - "message" : { - "type" : "text" - } - } - }, - "ingest_pipeline" : { - "description" : "Ingest pipeline created by text structure finder", - "processors" : [ - { - "grok" : { - "field" : "message", - "patterns" : [ - "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel}.*" - ] - } - }, - { - "date" : { - "field" : "timestamp", - "timezone" : "{{ event.timezone }}", - "formats" : [ - "ISO8601" - ] - } - }, - { - "remove" : { - "field" : "timestamp" - } - } - ] - }, - "field_stats" : { - "loglevel" : { - "count" : 53, - "cardinality" : 3, - "top_hits" : [ - { - "value" : "INFO", - "count" : 51 - }, - { - "value" : "DEBUG", - "count" : 1 - }, - { - "value" : "WARN", - "count" : 1 - } - ] - }, - "timestamp" : { - "count" : 53, - "cardinality" : 28, - "earliest" : "2018-09-27T14:39:28,518", - "latest" : "2018-09-27T14:39:37,012", - "top_hits" : [ - { - "value" : "2018-09-27T14:39:29,859", - "count" : 10 - }, - { - "value" : "2018-09-27T14:39:29,860", - "count" : 9 - }, - { - "value" : "2018-09-27T14:39:29,858", - "count" : 6 - }, - { - "value" : "2018-09-27T14:39:28,523", - "count" : 3 - }, - { - "value" : "2018-09-27T14:39:34,234", - "count" : 2 - }, - { - "value" : "2018-09-27T14:39:28,518", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:28,521", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:28,522", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:29,861", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:32,786", - "count" : 1 - } - ] - } - } -} ----- -// NOTCONSOLE - -<1> This time the `format` has been identified as `semi_structured_text`. -<2> The `multiline_start_pattern` is set on the basis that the timestamp appears - in the first line of each multi-line log message. -<3> A very simple `grok_pattern` has been created, which extracts the timestamp - and recognizable fields that appear in every analyzed message. In this case - the only field that was recognized beyond the timestamp was the log level. - - -[[ml-find-file-structure-example-grok]] -=== Specifying `grok_pattern` as query parameter - -If you recognize more fields than the simple `grok_pattern` produced by the -structure finder unaided then you can resubmit the request specifying a more -advanced `grok_pattern` as a query parameter and the structure finder will -calculate `field_stats` for your additional fields. - -In the case of the {es} log a more complete Grok pattern is -`\[%{TIMESTAMP_ISO8601:timestamp}\]\[%{LOGLEVEL:loglevel} *\]\[%{JAVACLASS:class} *\] \[%{HOSTNAME:node}\] %{JAVALOGMESSAGE:message}`. -You can analyze the same log file again, submitting this `grok_pattern` as a -query parameter (appropriately URL escaped): - -[source,js] ----- -curl -s -H "Content-Type: application/json" -XPOST "localhost:9200/_ml/find_file_structure?pretty&format=semi_structured_text&grok_pattern=%5C%5B%25%7BTIMESTAMP_ISO8601:timestamp%7D%5C%5D%5C%5B%25%7BLOGLEVEL:loglevel%7D%20*%5C%5D%5C%5B%25%7BJAVACLASS:class%7D%20*%5C%5D%20%5C%5B%25%7BHOSTNAME:node%7D%5C%5D%20%25%7BJAVALOGMESSAGE:message%7D" -T "$ES_HOME/logs/elasticsearch.log" ----- -// NOTCONSOLE -// Not converting to console because this shows how curl can be used - -If the request does not encounter errors, the result will look something like -this: - -[source,js] ----- -{ - "num_lines_analyzed" : 53, - "num_messages_analyzed" : 53, - "sample_start" : "[2018-09-27T14:39:28,518][INFO ][o.e.e.NodeEnvironment ] [node-0] using [1] data paths, mounts [[/ (/dev/disk1)]], net usable_space [165.4gb], net total_space [464.7gb], types [hfs]\n[2018-09-27T14:39:28,521][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [494.9mb], compressed ordinary object pointers [true]\n", - "charset" : "UTF-8", - "has_byte_order_marker" : false, - "format" : "semi_structured_text", - "multiline_start_pattern" : "^\\[\\b\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", - "grok_pattern" : "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} *\\]\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}", <1> - "timestamp_field" : "timestamp", - "joda_timestamp_formats" : [ - "ISO8601" - ], - "java_timestamp_formats" : [ - "ISO8601" - ], - "need_client_timezone" : true, - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date" - }, - "class" : { - "type" : "keyword" - }, - "loglevel" : { - "type" : "keyword" - }, - "message" : { - "type" : "text" - }, - "node" : { - "type" : "keyword" - } - } - }, - "ingest_pipeline" : { - "description" : "Ingest pipeline created by text structure finder", - "processors" : [ - { - "grok" : { - "field" : "message", - "patterns" : [ - "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} *\\]\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}" - ] - } - }, - { - "date" : { - "field" : "timestamp", - "timezone" : "{{ event.timezone }}", - "formats" : [ - "ISO8601" - ] - } - }, - { - "remove" : { - "field" : "timestamp" - } - } - ] - }, - "field_stats" : { <2> - "class" : { - "count" : 53, - "cardinality" : 14, - "top_hits" : [ - { - "value" : "o.e.p.PluginsService", - "count" : 26 - }, - { - "value" : "o.e.c.m.MetadataIndexTemplateService", - "count" : 8 - }, - { - "value" : "o.e.n.Node", - "count" : 7 - }, - { - "value" : "o.e.e.NodeEnvironment", - "count" : 2 - }, - { - "value" : "o.e.a.ActionModule", - "count" : 1 - }, - { - "value" : "o.e.c.s.ClusterApplierService", - "count" : 1 - }, - { - "value" : "o.e.c.s.MasterService", - "count" : 1 - }, - { - "value" : "o.e.d.DiscoveryModule", - "count" : 1 - }, - { - "value" : "o.e.g.GatewayService", - "count" : 1 - }, - { - "value" : "o.e.l.LicenseService", - "count" : 1 - } - ] - }, - "loglevel" : { - "count" : 53, - "cardinality" : 3, - "top_hits" : [ - { - "value" : "INFO", - "count" : 51 - }, - { - "value" : "DEBUG", - "count" : 1 - }, - { - "value" : "WARN", - "count" : 1 - } - ] - }, - "message" : { - "count" : 53, - "cardinality" : 53, - "top_hits" : [ - { - "value" : "Using REST wrapper from plugin org.elasticsearch.xpack.security.Security", - "count" : 1 - }, - { - "value" : "adding template [.monitoring-alerts] for index patterns [.monitoring-alerts-6]", - "count" : 1 - }, - { - "value" : "adding template [.monitoring-beats] for index patterns [.monitoring-beats-6-*]", - "count" : 1 - }, - { - "value" : "adding template [.monitoring-es] for index patterns [.monitoring-es-6-*]", - "count" : 1 - }, - { - "value" : "adding template [.monitoring-kibana] for index patterns [.monitoring-kibana-6-*]", - "count" : 1 - }, - { - "value" : "adding template [.monitoring-logstash] for index patterns [.monitoring-logstash-6-*]", - "count" : 1 - }, - { - "value" : "adding template [.triggered_watches] for index patterns [.triggered_watches*]", - "count" : 1 - }, - { - "value" : "adding template [.watch-history-9] for index patterns [.watcher-history-9*]", - "count" : 1 - }, - { - "value" : "adding template [.watches] for index patterns [.watches*]", - "count" : 1 - }, - { - "value" : "starting ...", - "count" : 1 - } - ] - }, - "node" : { - "count" : 53, - "cardinality" : 1, - "top_hits" : [ - { - "value" : "node-0", - "count" : 53 - } - ] - }, - "timestamp" : { - "count" : 53, - "cardinality" : 28, - "earliest" : "2018-09-27T14:39:28,518", - "latest" : "2018-09-27T14:39:37,012", - "top_hits" : [ - { - "value" : "2018-09-27T14:39:29,859", - "count" : 10 - }, - { - "value" : "2018-09-27T14:39:29,860", - "count" : 9 - }, - { - "value" : "2018-09-27T14:39:29,858", - "count" : 6 - }, - { - "value" : "2018-09-27T14:39:28,523", - "count" : 3 - }, - { - "value" : "2018-09-27T14:39:34,234", - "count" : 2 - }, - { - "value" : "2018-09-27T14:39:28,518", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:28,521", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:28,522", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:29,861", - "count" : 1 - }, - { - "value" : "2018-09-27T14:39:32,786", - "count" : 1 - } - ] - } - } -} ----- -// NOTCONSOLE - -<1> The `grok_pattern` in the output is now the overridden one supplied in the - query parameter. -<2> The returned `field_stats` include entries for the fields from the - overridden `grok_pattern`. - -The URL escaping is hard, so if you are working interactively it is best to use -the {ml} UI! diff --git a/docs/reference/ml/anomaly-detection/apis/index.asciidoc b/docs/reference/ml/anomaly-detection/apis/index.asciidoc index c38996476b707..eebae76f9b9cf 100644 --- a/docs/reference/ml/anomaly-detection/apis/index.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/index.asciidoc @@ -21,8 +21,6 @@ include::delete-snapshot.asciidoc[leveloffset=+2] include::delete-expired-data.asciidoc[leveloffset=+2] //ESTIMATE include::estimate-model-memory.asciidoc[leveloffset=+2] -//FIND -include::find-file-structure.asciidoc[leveloffset=+2] //FLUSH include::flush-job.asciidoc[leveloffset=+2] //FORECAST diff --git a/docs/reference/ml/anomaly-detection/apis/ml-apis.asciidoc b/docs/reference/ml/anomaly-detection/apis/ml-apis.asciidoc index d401d96a5ac0e..8c48a4a0e5696 100644 --- a/docs/reference/ml/anomaly-detection/apis/ml-apis.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/ml-apis.asciidoc @@ -69,12 +69,6 @@ See also <>. * <> * <> -[discrete] -[[ml-api-file-structure-endpoint]] -== File structure - -* <> - [discrete] [[ml-api-ml-info-endpoint]] == Info