Skip to content

Commit 41df492

Browse files
authored
update tantivy (#1237)
update tantivy and replace tantivy::chrono dependency
1 parent 7afa892 commit 41df492

File tree

12 files changed

+58
-40
lines changed

12 files changed

+58
-40
lines changed

Cargo.lock

Lines changed: 10 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ quickwit-storage = { version = "0.2.1", path = "../quickwit-storage" }
2424
tokio = { version = "1", features = ["full"] }
2525
tokio-util = { version = "0.7", features = ["full"] }
2626
rand = "0.8"
27-
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="46d5de9", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
27+
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="447811c", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
2828
futures = "0.3"
2929
futures-util = { version = "0.3.1", default-features = false }
3030
uuid = "0.8"

quickwit-directories/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ futures = "0.3"
1616
serde = "1"
1717
serde_cbor = "0.11"
1818
serde_json = "1"
19-
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="46d5de9", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
19+
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="447811c", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
2020
quickwit-storage = { version = "0.2.1", path = "../quickwit-storage" }
2121
uuid = "0.8"
2222
once_cell = "1"
@@ -25,7 +25,7 @@ tracing = "0.1.29"
2525
thiserror = "1"
2626
anyhow = "1"
2727
async-trait = "0.1"
28-
chrono = "0.4"
28+
time = { version = "0.3.7", features = ["std"] }
2929

3030
[dev-dependencies]
3131
tempfile = '3'

quickwit-directories/src/debug_proxy_directory.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ use std::time::{Duration, Instant};
2424
use std::{fmt, io, mem};
2525

2626
use async_trait::async_trait;
27-
use tantivy::chrono::{DateTime, Utc};
2827
use tantivy::directory::error::OpenReadError;
2928
use tantivy::directory::{FileHandle, OwnedBytes};
3029
use tantivy::{Directory, HasLen};
30+
use time::OffsetDateTime;
3131

3232
use crate::StorageDirectory;
3333

@@ -63,14 +63,14 @@ pub struct ReadOperation {
6363
pub offset: usize,
6464
/// The number of bytes fetched
6565
pub num_bytes: usize,
66-
/// The date at which the operation was performed.
67-
pub start_date: DateTime<Utc>,
66+
/// The date at which the operation was performed (UTC timezone).
67+
pub start_date: OffsetDateTime,
6868
/// The elapsed time to run the read operatioon.
6969
pub duration: Duration,
7070
}
7171

7272
struct ReadOperationBuilder {
73-
start_date: DateTime<Utc>,
73+
start_date: OffsetDateTime,
7474
start_instant: Instant,
7575
path: PathBuf,
7676
offset: usize,
@@ -79,7 +79,7 @@ struct ReadOperationBuilder {
7979
impl ReadOperationBuilder {
8080
pub fn new(path: &Path) -> Self {
8181
let start_instant = Instant::now();
82-
let start_date = Utc::now();
82+
let start_date = OffsetDateTime::now_utc();
8383
ReadOperationBuilder {
8484
start_date,
8585
start_instant,

quickwit-doc-mapper/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ once_cell = "1.10"
1919
regex = "1"
2020
serde = { version = "1.0", features = ["derive"] }
2121
serde_json = "1.0"
22-
tantivy = { git = "https://github.com/quickwit-oss/tantivy", rev = "46d5de9", default-features = false, features = ["mmap", "lz4-compression", "quickwit"] }
23-
tantivy-query-grammar = { git = "https://github.com/quickwit-oss/tantivy/", rev = "46d5de9" }
22+
tantivy = { git = "https://github.com/quickwit-oss/tantivy", rev = "447811c", default-features = false, features = ["mmap", "lz4-compression", "quickwit"] }
23+
tantivy-query-grammar = { git = "https://github.com/quickwit-oss/tantivy/", rev = "447811c" }
2424
thiserror = "1.0"
2525
tracing = "0.1.29"
2626
typetag = "0.1"

quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@ mod tests {
475475
{
476476
"timestamp": 1586960586000,
477477
"body": "20200415T072306-0700 INFO This is a great log",
478+
"response_date2": "2021-12-19T16:39:57+00:00",
478479
"response_date": "2021-12-19T16:39:57Z",
479480
"response_time": 2.3,
480481
"response_payload": "YWJj",
@@ -490,7 +491,7 @@ mod tests {
490491
const EXPECTED_JSON_PATHS_AND_VALUES: &str = r#"{
491492
"timestamp": [1586960586000],
492493
"body": ["20200415T072306-0700 INFO This is a great log"],
493-
"response_date": ["2021-12-19T16:39:57+00:00"],
494+
"response_date": ["2021-12-19T16:39:57Z"],
494495
"response_time": [2.3],
495496
"response_payload": [[97,98,99]],
496497
"owner": ["foo"],
@@ -567,6 +568,12 @@ mod tests {
567568
.iter()
568569
.map(|expected_value| format!("{}", expected_value))
569570
.any(|expected_value| expected_value == value);
571+
if !is_value_in_expected_values {
572+
panic!(
573+
"Could not find: {:?} in {:?}",
574+
value, expected_json_paths_and_values
575+
);
576+
}
570577
assert!(is_value_in_expected_values);
571578
}
572579
});

quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@
2020
use std::convert::TryFrom;
2121

2222
use anyhow::bail;
23-
use chrono::{FixedOffset, Utc};
2423
use itertools::{process_results, Itertools};
2524
use serde::{Deserialize, Serialize};
2625
use serde_json::{self, Value as JsonValue};
2726
use tantivy::schema::{
2827
BytesOptions, Cardinality, DocParsingError as TantivyDocParser, FieldType, IndexRecordOption,
2928
NumericOptions, TextFieldIndexing, TextOptions, Value,
3029
};
30+
use tantivy::time::format_description::well_known::Rfc3339;
31+
use tantivy::time::OffsetDateTime;
32+
use tantivy::DateTime;
3133
use thiserror::Error;
3234

3335
use super::{default_as_true, FieldMappingType};
@@ -335,17 +337,16 @@ impl FieldMappingEntry {
335337
)?
336338
}
337339
JsonValue::String(value_as_str) => {
338-
let dt_with_fixed_tz: chrono::DateTime<FixedOffset> =
339-
chrono::DateTime::parse_from_rfc3339(&value_as_str).map_err(|err| {
340+
let date_time_utc = DateTime::new_utc(
341+
OffsetDateTime::parse(&value_as_str, &Rfc3339).map_err(|err| {
340342
DocParsingError::ValueError(
341343
self.name.clone(),
342344
format!("Expected RFC 3339 date, got '{}'. {:?}", value_as_str, err),
343345
)
344-
})?;
345-
vec![(
346-
FieldPath::new(&self.name),
347-
Value::Date(dt_with_fixed_tz.with_timezone(&Utc)),
348-
)]
346+
})?,
347+
);
348+
349+
vec![(FieldPath::new(&self.name), Value::Date(date_time_utc))]
349350
}
350351
JsonValue::Null => {
351352
vec![]
@@ -747,10 +748,11 @@ impl From<TantivyDocParser> for DocParsingError {
747748
#[cfg(test)]
748749
mod tests {
749750
use anyhow::bail;
750-
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
751751
use matches::matches;
752752
use serde_json::json;
753753
use tantivy::schema::{Cardinality, Value};
754+
use tantivy::time::{Date, Month, PrimitiveDateTime, Time};
755+
use tantivy::DateTime;
754756

755757
use super::FieldMappingEntry;
756758
use crate::default_doc_mapper::FieldMappingType;
@@ -1332,11 +1334,14 @@ mod tests {
13321334

13331335
// Successful parsing
13341336
let parsed_value = entry.parse(json!("2021-12-19T16:39:57-01:00"))?;
1335-
let datetime = NaiveDateTime::new(
1336-
NaiveDate::from_ymd(2021, 12, 19),
1337-
NaiveTime::from_hms(17, 39, 57),
1337+
1338+
let datetime = PrimitiveDateTime::new(
1339+
Date::from_calendar_date(2021, Month::December, 19).unwrap(),
1340+
Time::from_hms(17, 39, 57).unwrap(),
13381341
);
1339-
let datetime_utc = Utc.from_utc_datetime(&datetime);
1342+
// let datetime = datetime!(2021-12-19 17:39:57);
1343+
1344+
let datetime_utc = DateTime::new_primitive(datetime); // Utc.from_utc_datetime(&datetime);
13401345
assert_eq!(parsed_value.len(), 1);
13411346
assert_eq!(parsed_value[0].1, Value::Date(datetime_utc));
13421347

quickwit-indexing/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,16 @@ rusoto_kinesis = { version = "0.47", default-features = false, features = ["rust
3434
serde = "1"
3535
serde_json = "1"
3636
serde_yaml = "0.8"
37-
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="46d5de9", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
37+
tantivy = { git= "https://github.com/quickwit-oss/tantivy", rev="447811c", default-features=false, features = ["mmap", "lz4-compression", "quickwit"] }
3838
tempfile = "3.3"
3939
thiserror = "1"
4040
tokio = { version = "1", features = ["sync"] }
4141
tracing = "0.1.29"
4242
ulid = "0.5"
4343
tokio-stream = "0.1"
4444
arc-swap = "1.4"
45+
time = { version = "0.3.7", features = ["std"] }
46+
4547

4648
[features]
4749
kafka = ["rdkafka"]

quickwit-indexing/src/actors/uploader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use itertools::Itertools;
3131
use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity};
3232
use quickwit_metastore::{Metastore, SplitMetadata};
3333
use quickwit_storage::SplitPayloadBuilder;
34-
use tantivy::chrono::Utc;
34+
use time::OffsetDateTime;
3535
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
3636
use tracing::{info, info_span, warn, Instrument, Span};
3737

@@ -194,7 +194,7 @@ fn create_split_metadata(split: &PackagedSplit, footer_offsets: Range<u64>) -> S
194194
num_docs: split.num_docs as usize,
195195
time_range: split.time_range.clone(),
196196
original_size_in_bytes: split.size_in_bytes,
197-
create_timestamp: Utc::now().timestamp(),
197+
create_timestamp: OffsetDateTime::now_utc().unix_timestamp(),
198198
tags: split.tags.clone(),
199199
demux_num_ops: split.demux_num_ops,
200200
footer_offsets,

quickwit-indexing/src/garbage_collection.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use futures::StreamExt;
2424
use quickwit_actors::ActorContext;
2525
use quickwit_metastore::{Metastore, MetastoreError, SplitMetadata, SplitState};
2626
use quickwit_storage::StorageError;
27-
use tantivy::chrono::Utc;
2827
use thiserror::Error;
28+
use time::OffsetDateTime;
2929
use tracing::error;
3030

3131
use crate::actors::GarbageCollector;
@@ -83,7 +83,8 @@ pub async fn run_garbage_collect(
8383
ctx_opt: Option<&ActorContext<GarbageCollector>>,
8484
) -> anyhow::Result<Vec<FileEntry>> {
8585
// Select staged splits with staging timestamp older than grace period timestamp.
86-
let grace_period_timestamp = Utc::now().timestamp() - staged_grace_period.as_secs() as i64;
86+
let grace_period_timestamp =
87+
OffsetDateTime::now_utc().unix_timestamp() - staged_grace_period.as_secs() as i64;
8788

8889
let deletable_staged_splits: Vec<SplitMetadata> = metastore
8990
.list_splits(index_id, SplitState::Staged, None, None)
@@ -123,7 +124,8 @@ pub async fn run_garbage_collect(
123124
.await?;
124125

125126
// We wait another 2 minutes until the split is actually deleted.
126-
let grace_period_deletion = Utc::now().timestamp() - deletion_grace_period.as_secs() as i64;
127+
let grace_period_deletion =
128+
OffsetDateTime::now_utc().unix_timestamp() - deletion_grace_period.as_secs() as i64;
127129
let splits_to_delete = metastore
128130
.list_splits(index_id, SplitState::MarkedForDeletion, None, None)
129131
.await?

0 commit comments

Comments
 (0)