Skip to content

Commit 6f154b4

Browse files
author
Devdutt Shenoi
committed
refactor: restructure as a library
1 parent bfca663 commit 6f154b4

File tree

126 files changed

+1332
-168
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+1332
-168
lines changed

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
target
22
data*
3-
staging*
3+
staging/
44
limitcache
55
examples
66
cert.pem
@@ -14,4 +14,3 @@ parseable
1414
parseable_*
1515
parseable-env-secret
1616
cache
17-

Cargo.toml

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,129 @@
1-
[workspace]
2-
members = ["server"]
3-
resolver = "2"
1+
[package]
2+
name = "parseable"
3+
version = "1.6.2"
4+
authors = ["Parseable Team <[email protected]>"]
5+
edition = "2021"
6+
rust-version = "1.77.1"
7+
categories = ["logging", "observability", "log analytics"]
8+
build = "build.rs"
9+
10+
[dependencies]
11+
### apache arrow/datafusion dependencies
12+
# arrow = "51.0.0"
13+
arrow-schema = { version = "53.0.0", features = ["serde"] }
14+
arrow-array = { version = "53.0.0" }
15+
arrow-json = "53.0.0"
16+
arrow-ipc = { version = "53.0.0", features = ["zstd"] }
17+
arrow-select = "53.0.0"
18+
datafusion = "42.0.0"
19+
object_store = { version = "0.11.1", features = ["cloud", "aws", "azure"] }
20+
parquet = "53.0.0"
21+
arrow-flight = { version = "53.0.0", features = [ "tls" ] }
22+
tonic = {version = "0.12.3", features = ["tls", "transport", "gzip", "zstd"] }
23+
tonic-web = "0.12.3"
24+
tower-http = { version = "0.6.1", features = ["cors"] }
25+
26+
### actix dependencies
27+
actix-web-httpauth = "0.8"
28+
actix-web = { version = "4.9.0", features = ["rustls-0_22"] }
29+
actix-cors = "0.7.0"
30+
actix-web-prometheus = { version = "0.1" }
31+
actix-web-static-files = "4.0"
32+
mime = "0.3.17"
33+
34+
### other dependencies
35+
anyhow = { version = "1.0", features = ["backtrace"] }
36+
argon2 = "0.5.0"
37+
async-trait = "0.1.82"
38+
base64 = "0.22.0"
39+
lazy_static = "1.4"
40+
bytes = "1.4"
41+
byteorder = "1.4.3"
42+
bzip2 = { version = "*", features = ["static"] }
43+
cookie = "0.18.1"
44+
chrono = "0.4"
45+
chrono-humanize = "0.2"
46+
clap = { version = "4.1", default-features = false, features = [
47+
"std",
48+
"color",
49+
"help",
50+
"derive",
51+
"env",
52+
"cargo",
53+
"error-context",
54+
] }
55+
clokwerk = "0.4"
56+
crossterm = "0.28.1"
57+
derive_more = "0.99.18"
58+
env_logger = "0.11.3"
59+
fs_extra = "1.3"
60+
futures = "0.3"
61+
futures-util = "0.3.28"
62+
hex = "0.4"
63+
hostname = "0.4.0"
64+
http = "0.2.7"
65+
humantime-serde = "1.1"
66+
itertools = "0.13.0"
67+
log = "0.4"
68+
num_cpus = "1.15"
69+
once_cell = "1.17.1"
70+
prometheus = { version = "0.13", features = ["process"] }
71+
rand = "0.8.5"
72+
regex = "1.7.3"
73+
relative-path = { version = "1.7", features = ["serde"] }
74+
reqwest = { version = "0.11.27", default-features = false, features = [
75+
"rustls-tls",
76+
"json",
77+
] } # cannot update cause rustls is not latest `see rustls`
78+
rustls = "0.22.4" # cannot update to 0.23 actix has not caught up yet
79+
rustls-pemfile = "2.1.2"
80+
semver = "1.0"
81+
serde = { version = "1.0", features = ["rc", "derive"] }
82+
serde_json = "1.0"
83+
static-files = "0.2"
84+
sysinfo = "0.31.4"
85+
thiserror = "1.0.64"
86+
thread-priority = "1.0.0"
87+
tokio = { version = "1.28", default-features = false, features = [
88+
"sync",
89+
"macros",
90+
"fs",
91+
] }
92+
tokio-stream = { version = "0.1", features = ["fs"] }
93+
ulid = { version = "1.0", features = ["serde"] }
94+
uptime_lib = "0.3.0"
95+
xxhash-rust = { version = "0.8", features = ["xxh3"] }
96+
xz2 = { version = "*", features = ["static"] }
97+
nom = "7.1.3"
98+
humantime = "2.1.0"
99+
human-size = "0.4"
100+
openid = { version = "0.15.0", default-features = false, features = ["rustls"] }
101+
url = "2.4.0"
102+
http-auth-basic = "0.3.3"
103+
serde_repr = "0.1.17"
104+
hashlru = { version = "0.11.0", features = ["serde"] }
105+
path-clean = "1.0.1"
106+
prost = "0.13.3"
107+
prometheus-parse = "0.2.5"
108+
sha2 = "0.10.8"
109+
110+
[build-dependencies]
111+
cargo_toml = "0.20.1"
112+
sha1_smol = { version = "1.0", features = ["std"] }
113+
static-files = "0.2"
114+
ureq = "2.6"
115+
vergen = { version = "8.1", features = ["build", "git", "cargo", "gitcl"] }
116+
zip = { version = "2.2.0", default-features = false, features = ["deflate"] }
117+
url = "2.4.0"
118+
prost-build = "0.13.3"
119+
120+
[dev-dependencies]
121+
maplit = "1.0"
122+
rstest = "0.23.0"
123+
124+
[package.metadata.parseable_ui]
125+
assets-url = "https://github.com/parseablehq/console/releases/download/v0.9.11/build.zip"
126+
assets-sha1 = "3f0c0f0e9fe23c6a01f0eb45115da4bfe29f9c3f"
127+
128+
[features]
129+
debug = []
File renamed without changes.

server/Cargo.toml

Lines changed: 0 additions & 129 deletions
This file was deleted.

server/src/query/stream_schema_provider.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ use crate::catalog::Snapshot as CatalogSnapshot;
7575

7676
// schema provider for stream based on global data
7777
pub struct GlobalSchemaProvider {
78-
pub storage: Arc<dyn ObjectStorage + Send>,
78+
pub storage: Arc<dyn ObjectStorage>,
7979
}
8080

8181
#[async_trait::async_trait]
@@ -317,17 +317,24 @@ impl TableProvider for StandardTableProvider {
317317
filters: &[Expr],
318318
limit: Option<usize>,
319319
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
320+
// only for staging in standalone
320321
let mut memory_exec = None;
322+
// TODO: remove
321323
let mut cache_exec = None;
324+
// only for data in hottier
322325
let mut hot_tier_exec = None;
326+
// for s3, because older data might not have manifest
323327
let mut listing_exec = None;
328+
// takes object storage registry and get's store from url
324329
let object_store = state
325330
.runtime_env()
326331
.object_store_registry
327332
.get_store(&self.url)
328333
.unwrap();
334+
// get's configured storage from parseable setup
329335
let glob_storage = CONFIG.storage().get_object_store();
330336

337+
// Figures out .stream.json
331338
let object_store_format = glob_storage
332339
.get_object_store_format(&self.stream)
333340
.await
@@ -338,6 +345,7 @@ impl TableProvider for StandardTableProvider {
338345
return Err(DataFusionError::Plan("potentially unbounded query on time range. Table scanning requires atleast one time bound".to_string()));
339346
}
340347

348+
// Only query staging when provided time range includes now
341349
if include_now(filters, time_partition.clone()) {
342350
if let Some(records) =
343351
event::STREAM_WRITERS.recordbatches_cloned(&self.stream, &self.schema)
@@ -350,6 +358,7 @@ impl TableProvider for StandardTableProvider {
350358
);
351359
}
352360
};
361+
// Create a snapshot that contains all fields from different nodes
353362
let mut merged_snapshot: snapshot::Snapshot = Snapshot::default();
354363
if CONFIG.parseable.mode == Mode::Query {
355364
let path = RelativePathBuf::from_iter([&self.stream, STREAM_ROOT_DIRECTORY]);
@@ -377,6 +386,7 @@ impl TableProvider for StandardTableProvider {
377386

378387
// Is query timerange is overlapping with older data.
379388
// if true, then get listing table time filters and execution plan separately
389+
// BUG: unnecessary listings on dates where there is no data
380390
if is_overlapping_query(&merged_snapshot.manifest_list, &time_filters) {
381391
let listing_time_fiters =
382392
return_listing_time_filters(&merged_snapshot.manifest_list, &mut time_filters);
@@ -614,7 +624,7 @@ async fn get_hottier_exectuion_plan(
614624
#[allow(clippy::too_many_arguments)]
615625
async fn legacy_listing_table(
616626
stream: String,
617-
glob_storage: Arc<dyn ObjectStorage + Send>,
627+
glob_storage: Arc<dyn ObjectStorage>,
618628
object_store: Arc<dyn ObjectStore>,
619629
time_filters: &[PartialTimeFilter],
620630
schema: Arc<Schema>,
@@ -1064,7 +1074,7 @@ mod tests {
10641074
fn datetime_max(year: i32, month: u32, day: u32) -> DateTime<Utc> {
10651075
NaiveDate::from_ymd_opt(year, month, day)
10661076
.unwrap()
1067-
.and_hms_milli_opt(23, 59, 59, 99)
1077+
.and_hms_milli_opt(23, 59, 59, 999)
10681078
.unwrap()
10691079
.and_utc()
10701080
}
File renamed without changes.

server/src/alerts/mod.rs renamed to src/alerts/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ pub mod rule;
3131
pub mod target;
3232

3333
use crate::metrics::ALERTS_STATES;
34+
use crate::option::CONFIG;
3435
use crate::utils::arrow::get_field;
3536
use crate::utils::uid;
36-
use crate::CONFIG;
3737
use crate::{storage, utils};
3838

3939
pub use self::rule::Rule;
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)