From 1dfeab07f3c5e04ba7514109a694cc7de4fb8b35 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 09:36:45 -0400 Subject: [PATCH 01/14] chore(deps): Upgrade Rust to 1.50.0 Signed-off-by: Jesse Szwedko --- rust-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index 7f3a46a841e5d..5a5c7211dc68e 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.49.0 +1.50.0 From 80af0f7d24d65afd6b50e61bc77cdc796a7893c9 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 09:37:19 -0400 Subject: [PATCH 02/14] Replace deprecated compare_and_swap I believe this is the equivalent compare_exchange based on the table in the docs: Original Success Failure Relaxed Relaxed Relaxed Acquire Acquire Acquire Release Release Relaxed AcqRel AcqRel Acquire SeqCst SeqCst SeqCst https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#migrating-to-compare_exchange-and-compare_exchange_weak Signed-off-by: Jesse Szwedko --- src/internal_events/open.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/internal_events/open.rs b/src/internal_events/open.rs index 111ff5d7ad4f0..d32c38cada1e5 100644 --- a/src/internal_events/open.rs +++ b/src/internal_events/open.rs @@ -69,13 +69,13 @@ fn gauge_add(gauge: &AtomicUsize, add: isize, emitter: impl Fn(usize)) { emitter(new_value); // Try to update gauge to new value and releasing writes to gauge metric in the process. // Otherwise acquire new writes to gauge metric. - let latest = gauge.compare_and_swap(value, new_value, Ordering::AcqRel); - if value == latest { + value = match gauge.compare_exchange(value, new_value, Ordering::AcqRel, Ordering::Acquire) + { // Success - break; + Ok(_) => break, + // Try again with new value + Err(v) => v, } - // Try again with new value - value = latest; } // In the worst case scenario we will emit `n^2 / 2` times when there are `n` parallel From cc6e799075c1b55e67de9d23adbb9aa2dc12f08a Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 09:40:43 -0400 Subject: [PATCH 03/14] Collapse patterns Signed-off-by: Jesse Szwedko --- src/sources/journald.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 3b5f2c65f3b9a..f89d035a7d30c 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -391,18 +391,16 @@ fn create_event(record: Record) -> Event { log.insert(log_schema().host_key(), host); } // Translate the timestamp, and so leave both old and new names. - if let Some(timestamp) = log + if let Some(Value::Bytes(timestamp)) = log .get(&*SOURCE_TIMESTAMP) .or_else(|| log.get(RECEIVED_TIMESTAMP)) { - if let Value::Bytes(timestamp) = timestamp { - if let Ok(timestamp) = String::from_utf8_lossy(×tamp).parse::() { - let timestamp = chrono::Utc.timestamp( - (timestamp / 1_000_000) as i64, - (timestamp % 1_000_000) as u32 * 1_000, - ); - log.insert(log_schema().timestamp_key(), Value::Timestamp(timestamp)); - } + if let Ok(timestamp) = String::from_utf8_lossy(×tamp).parse::() { + let timestamp = chrono::Utc.timestamp( + (timestamp / 1_000_000) as i64, + (timestamp % 1_000_000) as u32 * 1_000, + ); + log.insert(log_schema().timestamp_key(), Value::Timestamp(timestamp)); } } // Add source type From 40aec9ed5081ef42b9f5097f5634b31aaaf61ee0 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 09:41:43 -0400 Subject: [PATCH 04/14] Prefer contairs over find + is_some Signed-off-by: Jesse Szwedko --- src/tls/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tls/settings.rs b/src/tls/settings.rs index c29b02c925ddd..a5b7f51578567 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -434,7 +434,7 @@ fn der_or_pem(data: Vec, der_fn: impl Fn(Vec) -> T, pem_fn: impl Fn(S /// inline data and is used directly instead of opening a file. fn open_read(filename: &Path, note: &'static str) -> Result<(Vec, PathBuf)> { if let Some(filename) = filename.to_str() { - if filename.find(PEM_START_MARKER).is_some() { + if filename.contains(PEM_START_MARKER) { return Ok((Vec::from(filename), "inline text".into())); } } From ef2ebff388a090c4a7a87b248cb4f59b02c18eb3 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 10:09:58 -0400 Subject: [PATCH 05/14] Satisfy clippy::unnecessary_wraps Signed-off-by: Jesse Szwedko --- lib/remap-lang/src/expression/assignment.rs | 3 ++ lib/remap-lang/src/parser.rs | 3 ++ src/sinks/aws_kinesis_firehose.rs | 10 ++--- src/sinks/nats.rs | 10 ++--- src/sinks/papertrail.rs | 9 ++-- src/sinks/util/buffer/metrics.rs | 6 +-- src/sinks/vector.rs | 6 +-- src/sources/aws_ecs_metrics/parser.rs | 48 ++++++++++----------- src/sources/journald.rs | 9 ++-- src/sources/socket/mod.rs | 6 +-- src/sources/socket/unix.rs | 8 ++-- src/sources/syslog.rs | 42 ++++++++---------- src/test_util/stats.rs | 16 +++---- src/transforms/aws_ec2_metadata.rs | 6 +-- 14 files changed, 90 insertions(+), 92 deletions(-) diff --git a/lib/remap-lang/src/expression/assignment.rs b/lib/remap-lang/src/expression/assignment.rs index 504406b805afe..98344d6f77b2a 100644 --- a/lib/remap-lang/src/expression/assignment.rs +++ b/lib/remap-lang/src/expression/assignment.rs @@ -122,6 +122,9 @@ impl Expression for Assignment { fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { let value = self.value.execute(state, object); + // ignoring the unnecessariy wrap as this whole parser is going away momentarily and this + // matches up better with the other *_assignment methods anyway + #[allow(clippy::unnecessary_wraps)] fn var_assignment<'a>( state: &mut state::Program, var: &Variable, diff --git a/lib/remap-lang/src/parser.rs b/lib/remap-lang/src/parser.rs index ec5c709db5a87..d2444c40bdc94 100644 --- a/lib/remap-lang/src/parser.rs +++ b/lib/remap-lang/src/parser.rs @@ -379,6 +379,9 @@ impl<'a> Parser<'a> { Ok((start..end, nodes).into()) } + // ignoring the unnecessariy wrap as this whole parser is going away momentarily and this + // matches up better with the other *_from_* methods anyway + #[allow(clippy::unnecessary_wraps)] fn pairs_from_str<'b>(&mut self, rule: R, source: &'b str) -> IResult> { use pest::Parser; diff --git a/src/sinks/aws_kinesis_firehose.rs b/src/sinks/aws_kinesis_firehose.rs index 1529d18434956..2597e4f259eb8 100644 --- a/src/sinks/aws_kinesis_firehose.rs +++ b/src/sinks/aws_kinesis_firehose.rs @@ -162,7 +162,7 @@ impl KinesisFirehoseService { cx.acker(), ) .sink_map_err(|error| error!(message = "Fatal kinesis firehose sink error.", %error)) - .with_flat_map(move |e| stream::iter(encode_event(e, &encoding)).map(Ok)); + .with_flat_map(move |e| stream::iter(Some(encode_event(e, &encoding))).map(Ok)); Ok(sink) } @@ -240,7 +240,7 @@ enum HealthcheckError { StreamNamesMismatch { name: String, stream_name: String }, } -fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Option { +fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Record { encoding.apply_rules(&mut event); let log = event.into_log(); let data = match encoding.codec() { @@ -254,7 +254,7 @@ fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Option let data = Bytes::from(data); - Some(Record { data }) + Record { data } } #[cfg(test)] @@ -270,7 +270,7 @@ mod tests { #[test] fn firehose_encode_event_text() { let message = "hello world".to_string(); - let event = encode_event(message.clone().into(), &Encoding::Text.into()).unwrap(); + let event = encode_event(message.clone().into(), &Encoding::Text.into()); assert_eq!(&event.data[..], message.as_bytes()); } @@ -280,7 +280,7 @@ mod tests { let message = "hello world".to_string(); let mut event = Event::from(message.clone()); event.as_mut_log().insert("key", "value"); - let event = encode_event(event, &Encoding::Json.into()).unwrap(); + let event = encode_event(event, &Encoding::Json.into()); let map: BTreeMap = serde_json::from_slice(&event.data[..]).unwrap(); diff --git a/src/sinks/nats.rs b/src/sinks/nats.rs index b8fca85cc82dc..f7adb367b4557 100644 --- a/src/sinks/nats.rs +++ b/src/sinks/nats.rs @@ -85,18 +85,16 @@ impl SinkConfig for NatsSinkConfig { } impl NatsSinkConfig { - fn to_nats_options(&self) -> crate::Result { + fn to_nats_options(&self) -> nats::Options { // Set reconnect_buffer_size on the nats client to 0 bytes so that the // client doesn't buffer internally (to avoid message loss). - let options = nats::Options::new() + nats::Options::new() .with_name(&self.name) - .reconnect_buffer_size(0); - - Ok(options) + .reconnect_buffer_size(0) } async fn connect(&self) -> crate::Result { - self.to_nats_options()? + self.to_nats_options() .connect_async(&self.url) .map_err(|e| e.into()) .await diff --git a/src/sinks/papertrail.rs b/src/sinks/papertrail.rs index 709d912f20e29..71f44815cef82 100644 --- a/src/sinks/papertrail.rs +++ b/src/sinks/papertrail.rs @@ -65,7 +65,7 @@ impl SinkConfig for PapertrailConfig { let sink_config = TcpSinkConfig::new(address, self.keepalive, tls, self.send_buffer_bytes); - sink_config.build(cx, move |event| encode_event(event, pid, &encoding)) + sink_config.build(cx, move |event| Some(encode_event(event, pid, &encoding))) } fn input_type(&self) -> DataType { @@ -77,7 +77,7 @@ impl SinkConfig for PapertrailConfig { } } -fn encode_event(mut event: Event, pid: u32, encoding: &EncodingConfig) -> Option { +fn encode_event(mut event: Event, pid: u32, encoding: &EncodingConfig) -> Bytes { let host = if let Some(host) = event.as_mut_log().remove(log_schema().host_key()) { Some(host.to_string_lossy()) } else { @@ -110,7 +110,7 @@ fn encode_event(mut event: Event, pid: u32, encoding: &EncodingConfig) s.push(b'\n'); - Some(Bytes::from(s)) + Bytes::from(s) } #[cfg(test)] @@ -137,8 +137,7 @@ mod tests { except_fields: Some(vec!["magic".into()]), timestamp_format: None, }, - ) - .unwrap(); + ); let msg = bytes.slice(String::from_utf8_lossy(&bytes).find(": ").unwrap() + 2..bytes.len() - 1); diff --git a/src/sinks/util/buffer/metrics.rs b/src/sinks/util/buffer/metrics.rs index 01b3407f837cf..eb274b7ad8704 100644 --- a/src/sinks/util/buffer/metrics.rs +++ b/src/sinks/util/buffer/metrics.rs @@ -265,7 +265,7 @@ impl MetricSet { pub fn make_absolute(&mut self, metric: Metric) -> Option { match metric.data.kind { MetricKind::Absolute => Some(metric), - MetricKind::Incremental => self.incremental_to_absolute(metric), + MetricKind::Incremental => Some(self.incremental_to_absolute(metric)), } } @@ -281,7 +281,7 @@ impl MetricSet { /// Convert the incremental metric into an absolute one, using the /// state buffer to keep track of the value throughout the entire /// application uptime. - fn incremental_to_absolute(&mut self, metric: Metric) -> Option { + fn incremental_to_absolute(&mut self, metric: Metric) -> Metric { let mut entry = MetricEntry(metric.into_absolute()); let mut existing = self.0.take(&entry).unwrap_or_else(|| { // Start from zero value if the entry is not found. @@ -290,7 +290,7 @@ impl MetricSet { existing.data.value.add(&entry.data.value); entry.data.value = existing.data.value.clone(); self.0.insert(existing); - Some(entry.0) + entry.0 } /// Convert the absolute metric into an incremental by calculating diff --git a/src/sinks/vector.rs b/src/sinks/vector.rs index 4b352dc009528..54b4def1644ac 100644 --- a/src/sinks/vector.rs +++ b/src/sinks/vector.rs @@ -74,7 +74,7 @@ impl SinkConfig for VectorSinkConfig { self.send_buffer_bytes, ); - sink_config.build(cx, encode_event) + sink_config.build(cx, |event| Some(encode_event(event))) } fn input_type(&self) -> DataType { @@ -92,7 +92,7 @@ enum HealthcheckError { ConnectError { source: std::io::Error }, } -fn encode_event(event: Event) -> Option { +fn encode_event(event: Event) -> Bytes { let event = proto::EventWrapper::from(event); let event_len = event.encoded_len(); let full_len = event_len + 4; @@ -101,7 +101,7 @@ fn encode_event(event: Event) -> Option { out.put_u32(event_len as u32); event.encode(&mut out).unwrap(); - Some(out.into()) + out.into() } #[cfg(test)] diff --git a/src/sources/aws_ecs_metrics/parser.rs b/src/sources/aws_ecs_metrics/parser.rs index e3dc0c0494328..a599b899722c3 100644 --- a/src/sources/aws_ecs_metrics/parser.rs +++ b/src/sources/aws_ecs_metrics/parser.rs @@ -496,8 +496,8 @@ mod test { Utc.ymd(2018, 11, 14).and_hms_nano(8, 9, 10, 11) } - fn namespace() -> Option { - Some("aws_ecs".into()) + fn namespace() -> String { + "aws_ecs".into() } #[test] @@ -535,14 +535,14 @@ mod test { }"##; assert_eq!( - parse(json.as_bytes(), namespace()).unwrap(), + parse(json.as_bytes(), Some(namespace())).unwrap(), vec![ Metric::new( "blkio_recursive_io_service_bytes_total", MetricKind::Absolute, MetricValue::Counter { value: 0.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("device".into(), "202:26368".into()), @@ -562,7 +562,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 520192.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("device".into(), "202:26368".into()), @@ -613,14 +613,14 @@ mod test { }"##; assert_eq!( - parse(json.as_bytes(), namespace()).unwrap(), + parse(json.as_bytes(), Some(namespace())).unwrap(), vec![ Metric::new( "cpu_online_cpus", MetricKind::Absolute, MetricValue::Gauge { value: 2.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -640,7 +640,7 @@ mod test { value: 2007130000000.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -658,7 +658,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 510000000.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -676,7 +676,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 190000000.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -696,7 +696,7 @@ mod test { value: 2324920942.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -714,7 +714,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 0.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -732,7 +732,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 0.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -750,7 +750,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 0.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -770,7 +770,7 @@ mod test { value: 1095931487.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("cpu".into(), "0".into()), @@ -791,7 +791,7 @@ mod test { value: 1228989455.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("cpu".into(), "1".into()), @@ -859,7 +859,7 @@ mod test { } }"##; - let metrics = parse(json.as_bytes(), namespace()).unwrap(); + let metrics = parse(json.as_bytes(), Some(namespace())).unwrap(); assert_eq!( metrics @@ -871,7 +871,7 @@ mod test { MetricKind::Absolute, MetricValue::Gauge { value: 40120320.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -896,7 +896,7 @@ mod test { MetricKind::Absolute, MetricValue::Gauge { value: 47177728.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -921,7 +921,7 @@ mod test { MetricKind::Absolute, MetricValue::Gauge { value: 34885632.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -946,7 +946,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 31131.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ( @@ -985,7 +985,7 @@ mod test { } }"##; - let metrics = parse(json.as_bytes(), namespace()).unwrap(); + let metrics = parse(json.as_bytes(), Some(namespace())).unwrap(); assert_eq!( metrics @@ -997,7 +997,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 329932716.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("device".into(), "eth1".into()), @@ -1023,7 +1023,7 @@ mod test { MetricKind::Absolute, MetricValue::Counter { value: 2001229.0 }, ) - .with_namespace(namespace()) + .with_namespace(Some(namespace())) .with_tags(Some( vec![ ("device".into(), "eth1".into()), diff --git a/src/sources/journald.rs b/src/sources/journald.rs index f89d035a7d30c..e8e10a99e8c53 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -627,7 +627,7 @@ mod tests { impl FakeJournal { fn new( checkpoint: &Option, - ) -> crate::Result<(BoxStream<'static, io::Result>, StopJournalctlFn)> { + ) -> (BoxStream<'static, io::Result>, StopJournalctlFn) { let cursor = Cursor::new(FAKE_JOURNAL); let reader = BufReader::new(cursor); let mut journal = FakeJournal { reader }; @@ -640,7 +640,7 @@ mod tests { } } - Ok((Box::pin(journal), Box::new(|| ()))) + (Box::pin(journal), Box::new(|| ())) } } @@ -674,7 +674,10 @@ mod tests { remap_priority: true, out: tx, } - .run_shutdown(shutdown, Box::new(FakeJournal::new)); + .run_shutdown( + shutdown, + Box::new(|checkpoint| Ok(FakeJournal::new(checkpoint))), + ); tokio::spawn(source); delay_for(Duration::from_millis(100)).await; diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index a3495425ba5bf..cf63043398794 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -851,7 +851,7 @@ mod test { #[test] fn parses_unix_datagram_config() { let config = parses_unix_config("unix_datagram"); - assert!(matches!(config.mode,Mode::UnixDatagram { .. })); + assert!(matches!(config.mode, Mode::UnixDatagram { .. })); } ////////////// UNIX STREAM TESTS ////////////// @@ -890,13 +890,13 @@ mod test { #[test] fn parses_new_unix_stream_config() { let config = parses_unix_config("unix_stream"); - assert!(matches!(config.mode,Mode::UnixStream { .. })); + assert!(matches!(config.mode, Mode::UnixStream { .. })); } #[cfg(unix)] #[test] fn parses_old_unix_stream_config() { let config = parses_unix_config("unix"); - assert!(matches!(config.mode,Mode::UnixStream { .. })); + assert!(matches!(config.mode, Mode::UnixStream { .. })); } } diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index cb2489a04017d..5525fa4a2a074 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -40,7 +40,7 @@ impl UnixConfig { * Function to pass to build_unix_*_source, specific to the basic unix source. * Takes a single line of a received message and builds an Event object. **/ -fn build_event(host_key: &str, received_from: Option, line: &str) -> Option { +fn build_event(host_key: &str, received_from: Option, line: &str) -> Event { let byte_size = line.len(); let mut event = Event::from(line); event.as_mut_log().insert( @@ -54,7 +54,7 @@ fn build_event(host_key: &str, received_from: Option, line: &str) -> Opti byte_size, mode: SocketMode::Unix }); - Some(event) + event } pub(super) fn unix_datagram( @@ -71,7 +71,7 @@ pub(super) fn unix_datagram( LinesCodec::new_with_max_length(max_length), shutdown, out, - build_event, + |host_key, received_from, line| Some(build_event(host_key, received_from, line)), ) } @@ -88,6 +88,6 @@ pub(super) fn unix_stream( host_key, shutdown, out, - build_event, + |host_key, received_from, line| Some(build_event(host_key, received_from, line)), ) } diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index 73e7a7c220022..5b829b78ad102 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -154,7 +154,7 @@ impl SourceConfig for SyslogConfig { host_key, shutdown, out, - event_from_str, + |host_key, default_host, line| Some(event_from_str(host_key, default_host, line)), )), } } @@ -192,7 +192,7 @@ impl TcpSource for SyslogTcpSource { } fn build_event(&self, frame: String, host: Bytes) -> Option { - event_from_str(&self.host_key, Some(host), &frame) + Some(event_from_str(&self.host_key, Some(host), &frame)) } } @@ -336,9 +336,7 @@ pub fn udp( std::str::from_utf8(&bytes) .map_err(|error| emit!(SyslogUdpUtf8Error { error })) .ok() - .and_then(|s| { - event_from_str(&host_key, Some(received_from), s).map(Ok) - }) + .map(|s| Ok(event_from_str(&host_key, Some(received_from), s))) } Err(error) => { emit!(SyslogUdpReadError { error }); @@ -374,7 +372,7 @@ fn resolve_year((month, _date, _hour, _min, _sec): IncompleteDate) -> i32 { // TODO: many more cases to handle: // octet framing (i.e. num bytes as ascii string prefix) with and without delimiters // null byte delimiter in place of newline -fn event_from_str(host_key: &str, default_host: Option, line: &str) -> Option { +fn event_from_str(host_key: &str, default_host: Option, line: &str) -> Event { let line = line.trim(); let parsed = syslog_loose::parse_message_with_year(line, resolve_year); let mut event = Event::from(&parsed.msg[..]); @@ -412,7 +410,7 @@ fn event_from_str(host_key: &str, default_host: Option, line: &str) -> Op event = ?event ); - Some(event) + event } fn insert_fields_from_syslog(event: &mut Event, parsed: Message<&str>) { @@ -622,10 +620,7 @@ mod test { expected.insert("procid", 8449); } - assert_eq!( - event_from_str(&"host".to_string(), None, &raw).unwrap(), - expected - ); + assert_eq!(event_from_str(&"host".to_string(), None, &raw), expected); } #[test] @@ -654,7 +649,7 @@ mod test { } let event = event_from_str(&"host".to_string(), None, &raw); - assert_eq!(event, Some(expected.clone())); + assert_eq!(event, expected); let raw = format!( r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - {} {}"#, @@ -662,7 +657,7 @@ mod test { ); let event = event_from_str(&"host".to_string(), None, &raw); - assert_eq!(event, Some(expected)); + assert_eq!(event, expected); } #[test] @@ -680,7 +675,7 @@ mod test { r#"[empty]"# ); - let event = event_from_str(&"host".to_string(), None, &msg).unwrap(); + let event = event_from_str(&"host".to_string(), None, &msg); assert!(there_is_map_called_empty(event)); let msg = format!( @@ -688,7 +683,7 @@ mod test { r#"[non_empty x="1"][empty]"# ); - let event = event_from_str(&"host".to_string(), None, &msg).unwrap(); + let event = event_from_str(&"host".to_string(), None, &msg); assert!(there_is_map_called_empty(event)); let msg = format!( @@ -696,7 +691,7 @@ mod test { r#"[empty][non_empty x="1"]"# ); - let event = event_from_str(&"host".to_string(), None, &msg).unwrap(); + let event = event_from_str(&"host".to_string(), None, &msg); assert!(there_is_map_called_empty(event)); let msg = format!( @@ -704,7 +699,7 @@ mod test { r#"[empty not_really="testing the test"]"# ); - let event = event_from_str(&"host".to_string(), None, &msg).unwrap(); + let event = event_from_str(&"host".to_string(), None, &msg); assert!(!there_is_map_called_empty(event)); } @@ -717,8 +712,8 @@ mod test { let cleaned = r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - [meta sequenceId="1"] i am foobar"#; assert_eq!( - event_from_str(&"host".to_string(), None, raw).unwrap(), - event_from_str(&"host".to_string(), None, cleaned).unwrap() + event_from_str(&"host".to_string(), None, raw), + event_from_str(&"host".to_string(), None, cleaned) ); } @@ -726,7 +721,7 @@ mod test { fn syslog_ng_default_network() { let msg = "i am foobar"; let raw = format!(r#"<13>Feb 13 20:07:26 74794bfb6795 root[8539]: {}"#, msg); - let event = event_from_str(&"host".to_string(), None, &raw).unwrap(); + let event = event_from_str(&"host".to_string(), None, &raw); let mut expected = Event::from(msg); { @@ -756,7 +751,7 @@ mod test { r#"<190>Feb 13 21:31:56 74794bfb6795 liblogging-stdlog: [origin software="rsyslogd" swVersion="8.24.0" x-pid="8979" x-info="http://www.rsyslog.com"] {}"#, msg ); - let event = event_from_str(&"host".to_string(), None, &raw).unwrap(); + let event = event_from_str(&"host".to_string(), None, &raw); let mut expected = Event::from(msg); { @@ -811,9 +806,6 @@ mod test { expected.insert("origin.x-info", "http://www.rsyslog.com"); } - assert_eq!( - event_from_str(&"host".to_string(), None, &raw).unwrap(), - expected - ); + assert_eq!(event_from_str(&"host".to_string(), None, &raw), expected); } } diff --git a/src/test_util/stats.rs b/src/test_util/stats.rs index c50e6125887f7..d7dbbab9c9ff0 100644 --- a/src/test_util/stats.rs +++ b/src/test_util/stats.rs @@ -163,8 +163,8 @@ impl WeightedSum { pub fn add(&mut self, value: f64, weight: f64) { self.total += value * weight; self.weights += weight; - self.max = opt_max(self.max, value); - self.min = opt_min(self.min, value); + self.max = Some(opt_max(self.max, value)); + self.min = Some(opt_min(self.min, value)); } pub fn mean(&self) -> Option { @@ -197,20 +197,20 @@ impl Display for WeightedSum { } } -fn opt_max(opt: Option, value: f64) -> Option { - Some(match opt { +fn opt_max(opt: Option, value: f64) -> f64 { + match opt { None => value, Some(s) if s > value => s, _ => value, - }) + } } -fn opt_min(opt: Option, value: f64) -> Option { - Some(match opt { +fn opt_min(opt: Option, value: f64) -> f64 { + match opt { None => value, Some(s) if s < value => s, _ => value, - }) + } } /// A TimeWeightedSum is a wrapper around WeightedSum that keeps track diff --git a/src/transforms/aws_ec2_metadata.rs b/src/transforms/aws_ec2_metadata.rs index e6d597eb8f45d..d16494e6e1781 100644 --- a/src/transforms/aws_ec2_metadata.rs +++ b/src/transforms/aws_ec2_metadata.rs @@ -180,12 +180,12 @@ impl TaskTransform for Ec2MetadataTransform { Self: 'static, { let mut inner = self; - Box::pin(task.filter_map(move |event| ready(inner.transform_one(event)))) + Box::pin(task.filter_map(move |event| ready(Some(inner.transform_one(event))))) } } impl Ec2MetadataTransform { - fn transform_one(&mut self, mut event: Event) -> Option { + fn transform_one(&mut self, mut event: Event) -> Event { let log = event.as_mut_log(); if let Some(read_ref) = self.state.read() { @@ -196,7 +196,7 @@ impl Ec2MetadataTransform { }); } - Some(event) + event } } From dbd61aef1cdfdca65704c07b5b6df03d56d4f8e2 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 10:28:19 -0400 Subject: [PATCH 06/14] Satisfy clippy::wrong-self-convention Signed-off-by: Jesse Szwedko --- src/api/schema/components/sink.rs | 2 +- src/api/schema/components/source.rs | 2 +- src/api/schema/components/transform.rs | 2 +- src/api/schema/metrics/sink/mod.rs | 4 ++-- src/api/schema/metrics/source/mod.rs | 4 ++-- src/api/schema/metrics/transform/mod.rs | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/api/schema/components/sink.rs b/src/api/schema/components/sink.rs index a71b096ed6eab..f03ffa7b205e9 100644 --- a/src/api/schema/components/sink.rs +++ b/src/api/schema/components/sink.rs @@ -109,7 +109,7 @@ impl Sink { /// Sink metrics pub async fn metrics(&self) -> metrics::SinkMetrics { - metrics::by_component_name(self.get_name()).to_sink_metrics(self.get_component_type()) + metrics::by_component_name(self.get_name()).into_sink_metrics(self.get_component_type()) } } diff --git a/src/api/schema/components/source.rs b/src/api/schema/components/source.rs index 9594860300b68..2daedaf483fba 100644 --- a/src/api/schema/components/source.rs +++ b/src/api/schema/components/source.rs @@ -107,7 +107,7 @@ impl Source { /// Source metrics pub async fn metrics(&self) -> metrics::SourceMetrics { - metrics::by_component_name(&self.get_name()).to_source_metrics(&self.get_component_type()) + metrics::by_component_name(&self.get_name()).into_source_metrics(&self.get_component_type()) } } diff --git a/src/api/schema/components/transform.rs b/src/api/schema/components/transform.rs index c42abad9a91cc..8ce4f118e8159 100644 --- a/src/api/schema/components/transform.rs +++ b/src/api/schema/components/transform.rs @@ -88,7 +88,7 @@ impl Transform { /// Transform metrics pub async fn metrics(&self) -> metrics::TransformMetrics { - metrics::by_component_name(&self.0.name).to_transform_metrics(&self.get_component_type()) + metrics::by_component_name(&self.0.name).into_transform_metrics(&self.get_component_type()) } } diff --git a/src/api/schema/metrics/sink/mod.rs b/src/api/schema/metrics/sink/mod.rs index 394d12d4ac4e0..32863dde38a45 100644 --- a/src/api/schema/metrics/sink/mod.rs +++ b/src/api/schema/metrics/sink/mod.rs @@ -14,11 +14,11 @@ pub enum SinkMetrics { } pub trait IntoSinkMetrics { - fn to_sink_metrics(self, component_type: &str) -> SinkMetrics; + fn into_sink_metrics(self, component_type: &str) -> SinkMetrics; } impl IntoSinkMetrics for Vec { - fn to_sink_metrics(self, _component_type: &str) -> SinkMetrics { + fn into_sink_metrics(self, _component_type: &str) -> SinkMetrics { SinkMetrics::GenericSinkMetrics(generic::GenericSinkMetrics::new(self)) } } diff --git a/src/api/schema/metrics/source/mod.rs b/src/api/schema/metrics/source/mod.rs index 43c7fa4c9eb6d..15263ca6cf964 100644 --- a/src/api/schema/metrics/source/mod.rs +++ b/src/api/schema/metrics/source/mod.rs @@ -16,11 +16,11 @@ pub enum SourceMetrics { } pub trait IntoSourceMetrics { - fn to_source_metrics(self, component_type: &str) -> SourceMetrics; + fn into_source_metrics(self, component_type: &str) -> SourceMetrics; } impl IntoSourceMetrics for Vec { - fn to_source_metrics(self, component_type: &str) -> SourceMetrics { + fn into_source_metrics(self, component_type: &str) -> SourceMetrics { match component_type { "file" => SourceMetrics::FileSourceMetrics(file::FileSourceMetrics::new(self)), _ => SourceMetrics::GenericSourceMetrics(generic::GenericSourceMetrics::new(self)), diff --git a/src/api/schema/metrics/transform/mod.rs b/src/api/schema/metrics/transform/mod.rs index 09ea49dab26c4..149e384e7e505 100644 --- a/src/api/schema/metrics/transform/mod.rs +++ b/src/api/schema/metrics/transform/mod.rs @@ -14,11 +14,11 @@ pub enum TransformMetrics { } pub trait IntoTransformMetrics { - fn to_transform_metrics(self, component_type: &str) -> TransformMetrics; + fn into_transform_metrics(self, component_type: &str) -> TransformMetrics; } impl IntoTransformMetrics for Vec { - fn to_transform_metrics(self, _component_type: &str) -> TransformMetrics { + fn into_transform_metrics(self, _component_type: &str) -> TransformMetrics { TransformMetrics::GenericTransformMetrics(generic::GenericTransformMetrics::new(self)) } } From 8c383de51bab38b7d805981164fbb57ee6e0fc62 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 11:04:56 -0400 Subject: [PATCH 07/14] A couple more collapsible_matchs in a file I expect to go away Signed-off-by: Jesse Szwedko --- src/mapping/query/function/split.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mapping/query/function/split.rs b/src/mapping/query/function/split.rs index cea5e41ef1c27..3bc9e7ab65153 100644 --- a/src/mapping/query/function/split.rs +++ b/src/mapping/query/function/split.rs @@ -23,6 +23,7 @@ impl SplitFn { } impl Function for SplitFn { + #[allow(clippy::collapsible_match)] // I expect this file to be going away shortly fn execute(&self, ctx: &Event) -> Result { let string = { let bytes = required_value!(ctx, self.path, Value::Bytes(v) => v); @@ -71,10 +72,7 @@ impl Function for SplitFn { }, Parameter { keyword: "pattern", - accepts: |v| { - matches!(v, QueryValue::Value(Value::Bytes(_)) - | QueryValue::Regex(_)) - }, + accepts: |v| matches!(v, QueryValue::Value(Value::Bytes(_)) | QueryValue::Regex(_)), required: true, }, Parameter { From 5a899247e82df149439b44c7512b9f64487ee58f Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 14:01:36 -0400 Subject: [PATCH 08/14] cargo fmt Signed-off-by: Jesse Szwedko --- lib/prometheus-parser/src/lib.rs | 18 +++++--- lib/prometheus-parser/src/line.rs | 5 +- lib/remap-functions/src/to_int.rs | 12 ++++- lib/remap-functions/src/to_timestamp.rs | 5 +- src/kubernetes/stream.rs | 14 +++++- src/rusoto/auth.rs | 4 +- src/sinks/aws_s3.rs | 2 +- src/sinks/http.rs | 2 +- src/sinks/util/adaptive_concurrency/tests.rs | 48 +++++++++++++------- src/sources/prometheus/scrape.rs | 2 +- 10 files changed, 74 insertions(+), 38 deletions(-) diff --git a/lib/prometheus-parser/src/lib.rs b/lib/prometheus-parser/src/lib.rs index 056c64d4d2184..caa4c2dc4b4e0 100644 --- a/lib/prometheus-parser/src/lib.rs +++ b/lib/prometheus-parser/src/lib.rs @@ -319,7 +319,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::ExpectedChar { expected: ',', .. }, .. + kind: ErrorKind::ExpectedChar { expected: ',', .. }, + .. } )); @@ -329,7 +330,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::InvalidMetricKind { .. }, .. + kind: ErrorKind::InvalidMetricKind { .. }, + .. } )); @@ -339,7 +341,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::ExpectedSpace { .. }, .. + kind: ErrorKind::ExpectedSpace { .. }, + .. } )); @@ -349,7 +352,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::ExpectedChar { expected: '"', .. }, .. + kind: ErrorKind::ExpectedChar { expected: '"', .. }, + .. } )); @@ -359,7 +363,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::ExpectedChar { expected: '"', .. }, .. + kind: ErrorKind::ExpectedChar { expected: '"', .. }, + .. } )); @@ -369,7 +374,8 @@ mod test { assert!(matches!( error, ParserError::WithLine { - kind: ErrorKind::ParseFloatError { .. }, .. + kind: ErrorKind::ParseFloatError { .. }, + .. } )); } diff --git a/lib/prometheus-parser/src/line.rs b/lib/prometheus-parser/src/line.rs index e8cbf68c5412b..058e68d5f5b98 100644 --- a/lib/prometheus-parser/src/line.rs +++ b/lib/prometheus-parser/src/line.rs @@ -611,10 +611,7 @@ mod test { let input = wrap(r#"{ a="b" ,, c="d" }"#); let error = Metric::parse_labels(&input).unwrap_err().into(); println!("{}", error); - assert!(matches!( - error, - ErrorKind::ParseNameError { .. } - )); + assert!(matches!(error, ErrorKind::ParseNameError { .. })); } #[test] diff --git a/lib/remap-functions/src/to_int.rs b/lib/remap-functions/src/to_int.rs index ae688dd045fc4..de099d4b1f029 100644 --- a/lib/remap-functions/src/to_int.rs +++ b/lib/remap-functions/src/to_int.rs @@ -12,7 +12,17 @@ impl Function for ToInt { fn parameters(&self) -> &'static [Parameter] { &[Parameter { keyword: "value", - accepts: |v| matches!(v, Value::Integer(_) | Value::Float(_) | Value::Bytes(_) | Value::Boolean(_) | Value::Timestamp(_) | Value::Null), + accepts: |v| { + matches!( + v, + Value::Integer(_) + | Value::Float(_) + | Value::Bytes(_) + | Value::Boolean(_) + | Value::Timestamp(_) + | Value::Null + ) + }, required: true, }] } diff --git a/lib/remap-functions/src/to_timestamp.rs b/lib/remap-functions/src/to_timestamp.rs index a166eee64848f..fa8ed9ef35a4a 100644 --- a/lib/remap-functions/src/to_timestamp.rs +++ b/lib/remap-functions/src/to_timestamp.rs @@ -16,10 +16,7 @@ impl Function for ToTimestamp { accepts: |v| { matches!( v, - Value::Integer(_) | - Value::Float(_) | - Value::Bytes(_) | - Value::Timestamp(_) + Value::Integer(_) | Value::Float(_) | Value::Bytes(_) | Value::Timestamp(_) ) }, required: true, diff --git a/src/kubernetes/stream.rs b/src/kubernetes/stream.rs index 4974e8eded33c..319f49673ea63 100644 --- a/src/kubernetes/stream.rs +++ b/src/kubernetes/stream.rs @@ -121,7 +121,12 @@ mod tests { { let err = out_stream.next().await.unwrap().unwrap_err(); - assert!(matches!(err, Error::Reading { source: hyper::Error { .. } })); + assert!(matches!( + err, + Error::Reading { + source: hyper::Error { .. } + } + )); } assert!(out_stream.next().await.is_none()); @@ -139,7 +144,12 @@ mod tests { { let err = out_stream.next().await.unwrap().unwrap_err(); - assert!(matches!(err, Error::Parsing { source: response::Error::Json(_) })); + assert!(matches!( + err, + Error::Parsing { + source: response::Error::Json(_) + } + )); } assert!(out_stream.next().await.is_none()); diff --git a/src/rusoto/auth.rs b/src/rusoto/auth.rs index 66e17172f7785..7c6bba031cd4b 100644 --- a/src/rusoto/auth.rs +++ b/src/rusoto/auth.rs @@ -101,7 +101,7 @@ mod tests { ) .unwrap(); - assert!(matches!(config.auth, AWSAuthentication::Role{..})); + assert!(matches!(config.auth, AWSAuthentication::Role { .. })); } #[test] @@ -130,6 +130,6 @@ mod tests { ) .unwrap(); - assert!(matches!(config.auth, AWSAuthentication::Static{..})); + assert!(matches!(config.auth, AWSAuthentication::Static { .. })); } } diff --git a/src/sinks/aws_s3.rs b/src/sinks/aws_s3.rs index d16707314c9dc..68ecfbc5ea3cf 100644 --- a/src/sinks/aws_s3.rs +++ b/src/sinks/aws_s3.rs @@ -701,7 +701,7 @@ mod integration_tests { assert_downcast_matches!( config.healthcheck(client).await.unwrap_err(), HealthcheckError, - HealthcheckError::UnknownBucket{ .. } + HealthcheckError::UnknownBucket { .. } ); } diff --git a/src/sinks/http.rs b/src/sinks/http.rs index 3ba28c73d6436..1adeefb78bd7c 100644 --- a/src/sinks/http.rs +++ b/src/sinks/http.rs @@ -376,7 +376,7 @@ mod tests { assert_downcast_matches!( super::validate_headers(&config.request.headers, &None).unwrap_err(), BuildError, - BuildError::InvalidHeaderName{..} + BuildError::InvalidHeaderName { .. } ); } diff --git a/src/sinks/util/adaptive_concurrency/tests.rs b/src/sinks/util/adaptive_concurrency/tests.rs index 4ec6c743d8e25..ec5d465d05174 100644 --- a/src/sinks/util/adaptive_concurrency/tests.rs +++ b/src/sinks/util/adaptive_concurrency/tests.rs @@ -406,24 +406,40 @@ async fn run_test(params: TestParams) -> TestResults { .map(|event| (event.name().to_string(), event)) .collect::>(); // Ensure basic statistics are captured, don't actually examine them - assert!( - matches!(metrics.get("adaptive_concurrency_observed_rtt").unwrap().data.value, - MetricValue::Distribution { .. }) - ); - assert!( - matches!(metrics.get("adaptive_concurrency_averaged_rtt").unwrap().data.value, - MetricValue::Distribution { .. }) - ); + assert!(matches!( + metrics + .get("adaptive_concurrency_observed_rtt") + .unwrap() + .data + .value, + MetricValue::Distribution { .. } + )); + assert!(matches!( + metrics + .get("adaptive_concurrency_averaged_rtt") + .unwrap() + .data + .value, + MetricValue::Distribution { .. } + )); if params.concurrency == Concurrency::Adaptive { - assert!( - matches!(metrics.get("adaptive_concurrency_limit").unwrap().data.value, - MetricValue::Distribution { .. }) - ); + assert!(matches!( + metrics + .get("adaptive_concurrency_limit") + .unwrap() + .data + .value, + MetricValue::Distribution { .. } + )); } - assert!( - matches!(metrics.get("adaptive_concurrency_in_flight").unwrap().data.value, - MetricValue::Distribution { .. }) - ); + assert!(matches!( + metrics + .get("adaptive_concurrency_in_flight") + .unwrap() + .data + .value, + MetricValue::Distribution { .. } + )); TestResults { stats, cstats } } diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 597abf5b748a3..2f9b62035e18b 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -435,7 +435,7 @@ mod integration_tests { // Sample some well-known metrics let build = find_metric("prometheus_build_info"); assert!(matches!(build.data.kind, MetricKind::Absolute)); - assert!(matches!(build.data.value, MetricValue::Gauge { ..})); + assert!(matches!(build.data.value, MetricValue::Gauge { .. })); assert!(build.tags().unwrap().contains_key("branch")); assert!(build.tags().unwrap().contains_key("version")); From 680e19930cb2e3c04b93a1d9615ed819e6d1ad8c Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 15:28:32 -0400 Subject: [PATCH 09/14] More needless_wraps Signed-off-by: Jesse Szwedko --- lib/remap-functions/src/parse_aws_alb_log.rs | 12 ++++++------ ...e_aws_cloudwatch_log_subscription_message.rs | 10 +++++----- .../src/parse_aws_vpc_flow_log.rs | 17 +++++++++-------- lib/remap-functions/src/parse_common_log.rs | 16 ++++++++-------- lib/remap-functions/src/parse_glog.rs | 14 +++++++------- lib/remap-functions/src/parse_syslog.rs | 14 +++++++------- lib/remap-functions/src/parse_url.rs | 12 ++++++------ 7 files changed, 48 insertions(+), 47 deletions(-) diff --git a/lib/remap-functions/src/parse_aws_alb_log.rs b/lib/remap-functions/src/parse_aws_alb_log.rs index 1a92475090cb0..57570c0168a74 100644 --- a/lib/remap-functions/src/parse_aws_alb_log.rs +++ b/lib/remap-functions/src/parse_aws_alb_log.rs @@ -55,14 +55,14 @@ impl Expression for ParseAwsAlbLogFn { self.value .type_def(state) .into_fallible(true) // Log parsing error - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) .with_constraint(value::Kind::Map) } } /// The type defs of the fields contained by the returned map. -fn inner_type_def() -> Option { - Some(inner_type_def! ({ +fn inner_type_def() -> InnerTypeDef { + inner_type_def! ({ "type": Kind::Bytes, "timestamp": Kind::Bytes, "elb": Kind::Bytes, @@ -94,7 +94,7 @@ fn inner_type_def() -> Option { "target_status_code_list": Kind::Bytes, "classification": Kind::Bytes, "classification_reason": Kind::Bytes - })) + }) } fn parse_log(mut input: &str) -> Result { @@ -240,12 +240,12 @@ mod tests { remap::test_type_def![ value_string { expr: |_| ParseAwsAlbLogFn { value: Literal::from("foo").boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_optional { expr: |_| ParseAwsAlbLogFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } ]; diff --git a/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs b/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs index e48de2568bb6a..fd889b9c8cced 100644 --- a/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs +++ b/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs @@ -58,14 +58,14 @@ impl Expression for ParseAwsCloudWatchLogSubscriptionMessageFn { self.value .type_def(state) .into_fallible(true) // Message parsing error - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) .with_constraint(value::Kind::Map) } } /// The type defs of the fields contained by the returned map. -fn inner_type_def() -> Option { - Some(inner_type_def! ({ +fn inner_type_def() -> InnerTypeDef { + inner_type_def! ({ "owner": Kind::Bytes, "message_type": Kind::Bytes, "log_group": Kind::Bytes, @@ -78,7 +78,7 @@ fn inner_type_def() -> Option { "timestamp": Kind::Timestamp, "message": Kind::Bytes, }))) - })) + }) } #[cfg(test)] @@ -150,7 +150,7 @@ mod tests { def: TypeDef { fallible: true, kind: Kind::Map, - inner_type_def: inner_type_def(), + inner_type_def: Some(inner_type_def()), }, }]; } diff --git a/lib/remap-functions/src/parse_aws_vpc_flow_log.rs b/lib/remap-functions/src/parse_aws_vpc_flow_log.rs index 40d50bdc9a1b9..25f38b9a8cde4 100644 --- a/lib/remap-functions/src/parse_aws_vpc_flow_log.rs +++ b/lib/remap-functions/src/parse_aws_vpc_flow_log.rs @@ -64,14 +64,14 @@ impl Expression for ParseAwsVpcFlowLogFn { self.value .type_def(state) .into_fallible(true) // Log parsin_ error - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) .with_constraint(value::Kind::Map) } } /// The type defs of the fields contained by the returned map. -fn inner_type_def() -> Option { - Some(inner_type_def! ({ +fn inner_type_def() -> InnerTypeDef { + inner_type_def! ({ "version": Kind::Integer | Kind::Null, "account_id": Kind::Integer | Kind::Null, "interface_id": Kind::Bytes | Kind::Null, @@ -96,11 +96,12 @@ fn inner_type_def() -> Option { "region": Kind::Bytes | Kind::Null, "az_id": Kind::Bytes | Kind::Null, "sublocation_type": Kind::Bytes | Kind::Null, - })) + }) } type ParseResult = std::result::Result; +#[allow(clippy::unnecessary_wraps)] // match other parse methods fn identity<'a>(_key: &'a str, value: &'a str) -> ParseResult<&'a str> { Ok(value) } @@ -185,22 +186,22 @@ mod tests { remap::test_type_def![ value_noop { expr: |_| ParseAwsVpcFlowLogFn::new(Box::new(Noop), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_non_string { expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from(1).boxed(), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_string { expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from("foo").boxed(), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, } format_non_string { expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from("foo").boxed(), Some(Literal::from(1).boxed())), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, } ]; diff --git a/lib/remap-functions/src/parse_common_log.rs b/lib/remap-functions/src/parse_common_log.rs index a85b126a0f93c..620535418f862 100644 --- a/lib/remap-functions/src/parse_common_log.rs +++ b/lib/remap-functions/src/parse_common_log.rs @@ -153,14 +153,14 @@ impl Expression for ParseCommonLogFn { .type_def(state) .into_fallible(true) .with_constraint(value::Kind::Map) - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) } } -fn inner_type_def() -> Option { +fn inner_type_def() -> InnerTypeDef { use value::Kind; - Some(inner_type_def!({ + inner_type_def!({ "host": Kind::Bytes | Kind::Null, "identity": Kind::Bytes | Kind::Null, "user": Kind::Bytes | Kind::Null, @@ -171,7 +171,7 @@ fn inner_type_def() -> Option { "protocol": Kind::Bytes | Kind::Null, "status": Kind::Integer | Kind::Null, "size": Kind::Integer | Kind::Null, - })) + }) } #[cfg(test)] @@ -242,22 +242,22 @@ mod tests { test_type_def![ value_string { expr: |_| ParseCommonLogFn { value: Literal::from("foo").boxed(), timestamp_format: None }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_non_string { expr: |_| ParseCommonLogFn { value: Literal::from(1).boxed(), timestamp_format: None }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } timestamp_format_string { expr: |_| ParseCommonLogFn { value: Literal::from("foo").boxed(), timestamp_format: Some(Literal::from("foo").boxed()) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } timestamp_format_non_string { expr: |_| ParseCommonLogFn { value: Literal::from("foo").boxed(), timestamp_format: Some(Literal::from(1).boxed()) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } ]; } diff --git a/lib/remap-functions/src/parse_glog.rs b/lib/remap-functions/src/parse_glog.rs index 3babc8ba169de..b6dac736577b8 100644 --- a/lib/remap-functions/src/parse_glog.rs +++ b/lib/remap-functions/src/parse_glog.rs @@ -114,21 +114,21 @@ impl Expression for ParseGlogFn { .type_def(state) .into_fallible(true) .with_constraint(value::Kind::Map) - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) } } -fn inner_type_def() -> Option { +fn inner_type_def() -> InnerTypeDef { use value::Kind; - Some(inner_type_def!({ + inner_type_def!({ "level": Kind::Bytes, "timestamp": Kind::Timestamp, "id": Kind::Integer, "file": Kind::Bytes, "line": Kind::Integer, "message": Kind::Bytes, - })) + }) } #[cfg(test)] @@ -188,17 +188,17 @@ mod tests { test_type_def![ value_string { expr: |_| ParseGlogFn { value: Literal::from("foo").boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_non_string { expr: |_| ParseGlogFn { value: Literal::from(1).boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_optional { expr: |_| ParseGlogFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } ]; } diff --git a/lib/remap-functions/src/parse_syslog.rs b/lib/remap-functions/src/parse_syslog.rs index 91f64e0c6b32a..5d4b4cfadf042 100644 --- a/lib/remap-functions/src/parse_syslog.rs +++ b/lib/remap-functions/src/parse_syslog.rs @@ -115,12 +115,12 @@ impl Expression for ParseSyslogFn { .type_def(state) .into_fallible(true) .with_constraint(Kind::Map) - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) } } -fn inner_type_def() -> Option { - Some(inner_type_def! ({ +fn inner_type_def() -> InnerTypeDef { + inner_type_def! ({ "message": Kind::Bytes, "hostname": Kind::Bytes | Kind::Null, "severity": Kind::Bytes | Kind::Null, @@ -129,7 +129,7 @@ fn inner_type_def() -> Option { "msgid": Kind::Bytes | Kind::Null, "timestamp": Kind::Timestamp | Kind::Null, "procid": Kind::Bytes | Kind::Integer | Kind::Null - })) + }) } #[cfg(test)] @@ -143,7 +143,7 @@ mod tests { expr: |_| ParseSyslogFn { value: Literal::from("foo").boxed() }, def: TypeDef { kind: Kind::Map, fallible: true, - inner_type_def: inner_type_def(), + inner_type_def: Some(inner_type_def()), }, } @@ -151,7 +151,7 @@ mod tests { expr: |_| ParseSyslogFn { value: Literal::from(1).boxed() }, def: TypeDef { fallible: true, kind: Kind::Map, - inner_type_def: inner_type_def(), + inner_type_def: Some(inner_type_def()), }, } @@ -159,7 +159,7 @@ mod tests { expr: |_| ParseSyslogFn { value: Box::new(Noop) }, def: TypeDef { fallible: true, kind: Kind::Map, - inner_type_def: inner_type_def(), + inner_type_def: Some(inner_type_def()), }, } ]; diff --git a/lib/remap-functions/src/parse_url.rs b/lib/remap-functions/src/parse_url.rs index 44e718bacc4b2..484b954521ebe 100644 --- a/lib/remap-functions/src/parse_url.rs +++ b/lib/remap-functions/src/parse_url.rs @@ -53,14 +53,14 @@ impl Expression for ParseUrlFn { self.value .type_def(state) .into_fallible(true) // URL parsing error - .with_inner_type(inner_type_def()) + .with_inner_type(Some(inner_type_def())) .with_constraint(value::Kind::Map) } } /// The type defs of the fields contained by the returned map. -fn inner_type_def() -> Option { - Some(inner_type_def! ({ +fn inner_type_def() -> InnerTypeDef { + inner_type_def! ({ "scheme": Kind::Bytes, "username": Kind::Bytes, "password": Kind::Bytes, @@ -69,7 +69,7 @@ fn inner_type_def() -> Option { "port": Kind::Bytes, "fragment": Kind::Bytes | Kind::Null, "query": Kind::Map, - })) + }) } fn url_to_value(url: Url) -> Value { @@ -108,12 +108,12 @@ mod tests { remap::test_type_def![ value_string { expr: |_| ParseUrlFn { value: Literal::from("foo").boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } value_optional { expr: |_| ParseUrlFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: inner_type_def() }, + def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, } ]; From 300b565bb1ebce3e17ab84b6da071c30ca428a18 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 16:45:14 -0400 Subject: [PATCH 10/14] More unnecessary_wraps, but these ones I like Signed-off-by: Jesse Szwedko --- lib/remap-cli/src/cmd.rs | 13 ++++++------- lib/remap-cli/src/lib.rs | 1 - lib/remap-cli/src/repl.rs | 5 +---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/lib/remap-cli/src/cmd.rs b/lib/remap-cli/src/cmd.rs index 0acc8c6541d54..d4dfcab89a5fc 100644 --- a/lib/remap-cli/src/cmd.rs +++ b/lib/remap-cli/src/cmd.rs @@ -73,14 +73,13 @@ fn run(opts: &Opts) -> Result<(), Error> { } } -#[cfg(feature = "repl")] fn repl(objects: Vec) -> Result<(), Error> { - repl::run(objects) -} - -#[cfg(not(feature = "repl"))] -fn repl(_: Vec) -> Result<(), Error> { - Err(Error::ReplFeature) + if cfg!(feature = "repl") { + repl::run(objects); + Ok(()) + } else { + Err(Error::ReplFeature) + } } fn execute(object: &mut impl Object, source: String) -> Result { diff --git a/lib/remap-cli/src/lib.rs b/lib/remap-cli/src/lib.rs index 1ad764a42af61..b824333b3c21c 100644 --- a/lib/remap-cli/src/lib.rs +++ b/lib/remap-cli/src/lib.rs @@ -18,7 +18,6 @@ pub enum Error { #[error("json error")] Json(#[from] serde_json::Error), - #[cfg(not(feature = "repl"))] #[error("repl feature disabled, program input required")] ReplFeature, } diff --git a/lib/remap-cli/src/repl.rs b/lib/remap-cli/src/repl.rs index 9430f6d1e1675..4c04e0fae0f28 100644 --- a/lib/remap-cli/src/repl.rs +++ b/lib/remap-cli/src/repl.rs @@ -1,4 +1,3 @@ -use crate::Error; use prettytable::{format, Cell, Row, Table}; use regex::Regex; use remap::{state, Formatter, Object, Program, Runtime, Value}; @@ -24,7 +23,7 @@ VRL REPL commands: const DOCS_URL: &str = "https://vector.dev/docs/reference/vrl"; const FUNCTIONS_ROOT_URL: &str = "https://vector.dev/docs/reference/vrl/functions"; -pub(crate) fn run(mut objects: Vec) -> Result<(), Error> { +pub(crate) fn run(mut objects: Vec) { let mut index = 0; let func_docs_regex = Regex::new(r"^help\sdocs\s(\w{1,})$").unwrap(); @@ -131,8 +130,6 @@ pub(crate) fn run(mut objects: Vec) -> Result<(), Error> { } } } - - Ok(()) } fn resolve( From 653f30f2ef1cbf3e918ec6a2da9416ccc651b590 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 Feb 2021 16:47:51 -0400 Subject: [PATCH 11/14] more find().is_some() -> contains() Signed-off-by: Jesse Szwedko --- lib/shared/src/conversion.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/shared/src/conversion.rs b/lib/shared/src/conversion.rs index f97c5c4bb74a2..f8475be2b141f 100644 --- a/lib/shared/src/conversion.rs +++ b/lib/shared/src/conversion.rs @@ -184,11 +184,11 @@ fn parse_bool(s: &str) -> Result { /// Does the format specifier have a time zone option? fn format_has_zone(fmt: &str) -> bool { - fmt.find("%Z").is_some() - || fmt.find("%z").is_some() - || fmt.find("%:z").is_some() - || fmt.find("%#z").is_some() - || fmt.find("%+").is_some() + fmt.contains("%Z") + || fmt.contains("%z") + || fmt.contains("%:z") + || fmt.contains("%#z") + || fmt.contains("%+") } /// Convert a timestamp with a non-UTC time zone into UTC From 148332e4b97e791396b2495f3fd76c3ad69915f0 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Wed, 17 Feb 2021 11:09:35 -0400 Subject: [PATCH 12/14] More unnecessary_wraps Signed-off-by: Jesse Szwedko --- lib/vrl/parser/src/lex.rs | 83 ++++++++++---------- lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs | 1 + src/sources/prometheus/parser.rs | 8 +- 3 files changed, 48 insertions(+), 44 deletions(-) diff --git a/lib/vrl/parser/src/lex.rs b/lib/vrl/parser/src/lex.rs index 5c97a7c676e43..80ea444c0f37a 100644 --- a/lib/vrl/parser/src/lex.rs +++ b/lib/vrl/parser/src/lex.rs @@ -5,7 +5,8 @@ use std::iter::Peekable; use std::str::CharIndices; pub type Tok<'input> = Token<&'input str>; -pub type Spanned<'input, Loc> = Result<(Loc, Tok<'input>, Loc), Error>; +pub type SpannedResult<'input, Loc> = Result, Error>; +pub type Spanned<'input, Loc> = (Loc, Tok<'input>, Loc); #[derive(thiserror::Error, Clone, Debug, PartialEq)] pub enum Error { @@ -447,7 +448,7 @@ impl StringLiteral<&str> { // ----------------------------------------------------------------------------- impl<'input> Iterator for Lexer<'input> { - type Item = Spanned<'input, usize>; + type Item = SpannedResult<'input, usize>; fn next(&mut self) -> Option { use Token::*; @@ -461,7 +462,7 @@ impl<'input> Iterator for Lexer<'input> { // represent a physical character, instead it is a boundary marker. if self.query_start(start) { // dbg!("LQuery"); // NOTE: uncomment this for debugging - return self.token2(start, start + 1, LQuery); + return Some(Ok(self.token2(start, start + 1, LQuery))); } // Check if we need to emit a `RQuery` token. @@ -470,7 +471,7 @@ impl<'input> Iterator for Lexer<'input> { // represent a physical character, instead it is a boundary marker. if let Some(pos) = self.query_end(start) { // dbg!("RQuery"); // NOTE: uncomment this for debugging - return self.token2(pos, pos + 1, RQuery); + return Some(Ok(self.token2(pos, pos + 1, RQuery))); } // Advance the internal iterator and emit the next token, or loop @@ -479,26 +480,28 @@ impl<'input> Iterator for Lexer<'input> { let result = match ch { '"' => Some(self.string_literal(start)), - ';' => self.token(start, SemiColon), - '\n' => self.token(start, Newline), - '\\' => self.token(start, Escape), + ';' => Some(Ok(self.token(start, SemiColon))), + '\n' => Some(Ok(self.token(start, Newline))), + '\\' => Some(Ok(self.token(start, Escape))), - '(' => self.open(start, LParen), - '[' => self.open(start, LBracket), - '{' => self.open(start, LBrace), - '}' => self.close(start, RBrace), - ']' => self.close(start, RBracket), - ')' => self.close(start, RParen), + '(' => Some(Ok(self.open(start, LParen))), + '[' => Some(Ok(self.open(start, LBracket))), + '{' => Some(Ok(self.open(start, LBrace))), + '}' => Some(Ok(self.close(start, RBrace))), + ']' => Some(Ok(self.close(start, RBracket))), + ')' => Some(Ok(self.close(start, RParen))), - '.' => self.token(start, Dot), - ':' => self.token(start, Colon), - ',' => self.token(start, Comma), + '.' => Some(Ok(self.token(start, Dot))), + ':' => Some(Ok(self.token(start, Colon))), + ',' => Some(Ok(self.token(start, Comma))), - '_' if self.test_peek(char::is_alphabetic) => Some(self.internal_test(start)), - '_' => self.token(start, Underscore), + '_' if self.test_peek(char::is_alphabetic) => { + Some(Ok(self.internal_test(start))) + } + '_' => Some(Ok(self.token(start, Underscore))), '!' if self.test_peek(|ch| ch == '!' || !is_operator(ch)) => { - self.token(start, Bang) + Some(Ok(self.token(start, Bang))) } '#' => { @@ -510,14 +513,14 @@ impl<'input> Iterator for Lexer<'input> { 's' if self.test_peek(|ch| ch == '\'') => Some(self.raw_string_literal(start)), 't' if self.test_peek(|ch| ch == '\'') => Some(self.timestamp_literal(start)), - ch if is_ident_start(ch) => Some(self.identifier_or_function_call(start)), + ch if is_ident_start(ch) => Some(Ok(self.identifier_or_function_call(start))), ch if is_digit(ch) || (ch == '-' && self.test_peek(is_digit)) => { Some(self.numeric_literal(start)) } - ch if is_operator(ch) => Some(self.operator(start)), + ch if is_operator(ch) => Some(Ok(self.operator(start))), ch if ch.is_whitespace() => continue, - ch => self.token(start, InvalidToken(ch)), + ch => Some(Ok(self.token(start, InvalidToken(ch)))), }; // dbg!(&result); // NOTE: uncomment this for debugging @@ -529,7 +532,7 @@ impl<'input> Iterator for Lexer<'input> { // queries. } else if let Some(end) = self.rquery_indices.pop() { // dbg!("RQuery"); // NOTE: uncomment this for debugging - return self.token2(end, end + 1, RQuery); + return Some(Ok(self.token2(end, end + 1, RQuery))); } return None; @@ -542,7 +545,7 @@ impl<'input> Iterator for Lexer<'input> { // ----------------------------------------------------------------------------- impl<'input> Lexer<'input> { - fn open(&mut self, start: usize, token: Token<&'input str>) -> Option> { + fn open(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> { match &token { Token::LParen => self.open_parens += 1, Token::LBracket => self.open_brackets += 1, @@ -553,7 +556,7 @@ impl<'input> Lexer<'input> { self.token(start, token) } - fn close(&mut self, start: usize, token: Token<&'input str>) -> Option> { + fn close(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> { match &token { Token::RParen => self.open_parens = self.open_parens.saturating_sub(1), Token::RBracket => self.open_brackets = self.open_brackets.saturating_sub(1), @@ -564,7 +567,7 @@ impl<'input> Lexer<'input> { self.token(start, token) } - fn token(&mut self, start: usize, token: Token<&'input str>) -> Option> { + fn token(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> { let end = self.next_index(); self.token2(start, end, token) } @@ -574,8 +577,8 @@ impl<'input> Lexer<'input> { start: usize, end: usize, token: Token<&'input str>, - ) -> Option> { - Some(Ok((start, token, end))) + ) -> Spanned<'input, usize> { + (start, token, end) } fn query_end(&mut self, start: usize) -> Option { @@ -640,7 +643,7 @@ impl<'input> Lexer<'input> { let mut end = 0; while let Some((pos, ch)) = chars.next() { let take_until_end = - |result: Spanned<'input, usize>, + |result: SpannedResult<'input, usize>, last_char: &mut Option, end: &mut usize, chars: &mut Peekable>| { @@ -735,7 +738,7 @@ impl<'input> Lexer<'input> { while let Some((pos, ch)) = chars.peek() { let pos = *pos; - let literal_check = |result: Spanned<'input, usize>, chars: &mut Peekable>| match result { + let literal_check = |result: SpannedResult<'input, usize>, chars: &mut Peekable>| match result { Err(_) => Err(()), Ok((_, _, new)) => { #[allow(clippy::while_let_on_iterator)] @@ -854,7 +857,7 @@ impl<'input> Lexer<'input> { true } - fn string_literal(&mut self, start: usize) -> Spanned<'input, usize> { + fn string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> { let content_start = self.next_index(); loop { @@ -876,19 +879,19 @@ impl<'input> Lexer<'input> { Err(Error::StringLiteral { start }) } - fn regex_literal(&mut self, start: usize) -> Spanned<'input, usize> { + fn regex_literal(&mut self, start: usize) -> SpannedResult<'input, usize> { self.quoted_literal(start, Token::RegexLiteral) } - fn raw_string_literal(&mut self, start: usize) -> Spanned<'input, usize> { + fn raw_string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> { self.quoted_literal(start, |c| Token::StringLiteral(StringLiteral::Raw(c))) } - fn timestamp_literal(&mut self, start: usize) -> Spanned<'input, usize> { + fn timestamp_literal(&mut self, start: usize) -> SpannedResult<'input, usize> { self.quoted_literal(start, Token::TimestampLiteral) } - fn numeric_literal(&mut self, start: usize) -> Spanned<'input, usize> { + fn numeric_literal(&mut self, start: usize) -> SpannedResult<'input, usize> { let (end, int) = self.take_while(start, |ch| is_digit(ch) || ch == '_'); match self.peek() { @@ -928,7 +931,7 @@ impl<'input> Lexer<'input> { Token::ident(ident) }; - Ok((start, token, end)) + (start, token, end) } fn operator(&mut self, start: usize) -> Spanned<'input, usize> { @@ -941,21 +944,21 @@ impl<'input> Lexer<'input> { op => Token::Operator(op), }; - Ok((start, token, end)) + (start, token, end) } fn internal_test(&mut self, start: usize) -> Spanned<'input, usize> { self.bump(); let (end, test) = self.take_while(start, char::is_alphabetic); - Ok((start, Token::InternalTest(test), end)) + (start, Token::InternalTest(test), end) } fn quoted_literal( &mut self, start: usize, tok: impl Fn(&'input str) -> Tok<'input>, - ) -> Spanned<'input, usize> { + ) -> SpannedResult<'input, usize> { self.bump(); let content_start = self.next_index(); @@ -1122,7 +1125,7 @@ mod test { use super::*; use crate::lex::Token::*; - fn lexer(input: &str) -> impl Iterator> + '_ { + fn lexer(input: &str) -> impl Iterator> + '_ { let mut lexer = Lexer::new(input); Box::new(std::iter::from_fn(move || Some(lexer.next()?))) } diff --git a/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs b/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs index 8b9c9ba0aad54..24d33164fee1f 100644 --- a/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs +++ b/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs @@ -130,6 +130,7 @@ impl Expression for ParseAwsVpcFlowLogFn { type ParseResult = std::result::Result; +#[allow(clippy::unnecessary_wraps)] // match other parse methods fn identity<'a>(_key: &'a str, value: &'a str) -> ParseResult<&'a str> { Ok(value) } diff --git a/src/sources/prometheus/parser.rs b/src/sources/prometheus/parser.rs index 9d9696196a09f..b6c5786bf8a79 100644 --- a/src/sources/prometheus/parser.rs +++ b/src/sources/prometheus/parser.rs @@ -23,14 +23,14 @@ fn utc_timestamp(timestamp: Option) -> Option> { } pub(super) fn parse_text(packet: &str) -> Result, ParserError> { - reparse_groups(prometheus_parser::parse_text(packet)?) + prometheus_parser::parse_text(packet).map(reparse_groups) } pub(super) fn parse_request(request: proto::WriteRequest) -> Result, ParserError> { - reparse_groups(prometheus_parser::parse_request(request)?) + prometheus_parser::parse_request(request).map(reparse_groups) } -fn reparse_groups(groups: Vec) -> Result, ParserError> { +fn reparse_groups(groups: Vec) -> Vec { let mut result = Vec::new(); for group in groups { @@ -128,7 +128,7 @@ fn reparse_groups(groups: Vec) -> Result, ParserError> { } } - Ok(result) + result } #[cfg(test)] From de4eaee2261fc867f291225917d1d561d30b0e97 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Wed, 17 Feb 2021 13:22:23 -0400 Subject: [PATCH 13/14] clippy Signed-off-by: Jesse Szwedko --- lib/vrl/cli/src/repl.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/vrl/cli/src/repl.rs b/lib/vrl/cli/src/repl.rs index d341061d89b8c..191b52915baf7 100644 --- a/lib/vrl/cli/src/repl.rs +++ b/lib/vrl/cli/src/repl.rs @@ -1,4 +1,3 @@ -use crate::Error; use indoc::indoc; use prettytable::{format, Cell, Row, Table}; use regex::Regex; From f279014f63a058abe29e0afb8dbc1f1dffc2fb32 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Wed, 17 Feb 2021 19:21:24 -0400 Subject: [PATCH 14/14] Delete remap files from bad merge Signed-off-by: Jesse Szwedko --- lib/remap-functions/src/parse_aws_alb_log.rs | 307 ---- ...aws_cloudwatch_log_subscription_message.rs | 156 -- .../src/parse_aws_vpc_flow_log.rs | 260 --- lib/remap-functions/src/parse_syslog.rs | 265 ---- lib/remap-functions/src/parse_url.rs | 173 -- lib/remap-functions/src/to_int.rs | 170 -- lib/remap-functions/src/to_timestamp.rs | 162 -- lib/remap-lang/src/expression/assignment.rs | 325 ---- lib/remap-lang/src/parser.rs | 1389 ----------------- 9 files changed, 3207 deletions(-) delete mode 100644 lib/remap-functions/src/parse_aws_alb_log.rs delete mode 100644 lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs delete mode 100644 lib/remap-functions/src/parse_aws_vpc_flow_log.rs delete mode 100644 lib/remap-functions/src/parse_syslog.rs delete mode 100644 lib/remap-functions/src/parse_url.rs delete mode 100644 lib/remap-functions/src/to_int.rs delete mode 100644 lib/remap-functions/src/to_timestamp.rs delete mode 100644 lib/remap-lang/src/expression/assignment.rs delete mode 100644 lib/remap-lang/src/parser.rs diff --git a/lib/remap-functions/src/parse_aws_alb_log.rs b/lib/remap-functions/src/parse_aws_alb_log.rs deleted file mode 100644 index 57570c0168a74..0000000000000 --- a/lib/remap-functions/src/parse_aws_alb_log.rs +++ /dev/null @@ -1,307 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_while1}, - character::complete::char, - combinator::map_res, - sequence::{delimited, preceded}, - IResult, -}; -use remap::prelude::*; -use std::collections::BTreeMap; -use value::Kind; - -#[derive(Clone, Copy, Debug)] -pub struct ParseAwsAlbLog; - -impl Function for ParseAwsAlbLog { - fn identifier(&self) -> &'static str { - "parse_aws_alb_log" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| matches!(v, Value::Bytes(_)), - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ParseAwsAlbLogFn::new(value))) - } -} - -#[derive(Debug, Clone)] -struct ParseAwsAlbLogFn { - value: Box, -} - -impl ParseAwsAlbLogFn { - fn new(value: Box) -> Self { - Self { value } - } -} - -impl Expression for ParseAwsAlbLogFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let bytes = self.value.execute(state, object)?.try_bytes()?; - - parse_log(&String::from_utf8_lossy(&bytes)) - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - self.value - .type_def(state) - .into_fallible(true) // Log parsing error - .with_inner_type(Some(inner_type_def())) - .with_constraint(value::Kind::Map) - } -} - -/// The type defs of the fields contained by the returned map. -fn inner_type_def() -> InnerTypeDef { - inner_type_def! ({ - "type": Kind::Bytes, - "timestamp": Kind::Bytes, - "elb": Kind::Bytes, - "client_host": Kind::Bytes, - "target_host": Kind::Bytes, - "request_processing_time": Kind::Float, - "target_processing_time": Kind::Float, - "response_processing_time": Kind::Float, - "elb_status_code": Kind::Bytes, - "target_status_code": Kind::Bytes, - "received_bytes": Kind::Integer, - "sent_bytes": Kind::Integer, - "request_method": Kind::Bytes, - "request_protocol": Kind::Bytes, - "request_url": Kind::Bytes, - "user_agent": Kind::Bytes, - "ssl_cipher": Kind::Bytes, - "ssl_protocol": Kind::Bytes, - "target_group_arn": Kind::Bytes, - "trace_id": Kind::Bytes, - "domain_name": Kind::Bytes, - "chosen_cert_arn": Kind::Bytes, - "matched_rule_priority": Kind::Bytes, - "request_creation_time": Kind::Bytes, - "actions_executed": Kind::Bytes, - "redirect_url": Kind::Bytes, - "error_reason": Kind::Bytes, - "target_port_list": Kind::Bytes, - "target_status_code_list": Kind::Bytes, - "classification": Kind::Bytes, - "classification_reason": Kind::Bytes - }) -} - -fn parse_log(mut input: &str) -> Result { - let mut log = BTreeMap::new(); - - macro_rules! get_value { - ($name:expr, $parser:expr) => {{ - let result: IResult<&str, _, (&str, nom::error::ErrorKind)> = $parser(input); - match result { - Ok((rest, value)) => { - input = rest; - value - } - Err(error) => { - return Err(format!("failed to get field `{}`: {}", $name, error).into()) - } - } - }}; - } - macro_rules! field_raw { - ($name:expr, $parser:expr) => { - log.insert( - $name.into(), - match get_value!($name, $parser).into() { - Value::Bytes(bytes) if bytes == &"-" => Value::Null, - value => value, - }, - ) - }; - } - macro_rules! field { - ($name:expr, $($pattern:pat)|+) => { - field_raw!($name, preceded(char(' '), take_while1(|c| matches!(c, $($pattern)|+)))) - }; - } - macro_rules! field_parse { - ($name:expr, $($pattern:pat)|+, $type:ty) => { - field_raw!($name, map_res(preceded(char(' '), take_while1(|c| matches!(c, $($pattern)|+))), |s: &str| s.parse::<$type>())) - }; - } - - field_raw!("type", take_while1(|c| matches!(c, 'a'..='z' | '0'..='9'))); - field!("timestamp", '0'..='9' | '.' | '-' | ':' | 'T' | 'Z'); - field_raw!("elb", take_anything); - field!("client_host", '0'..='9' | '.' | ':' | '-'); - field!("target_host", '0'..='9' | '.' | ':' | '-'); - field_parse!("request_processing_time", '0'..='9' | '.' | '-', f64); - field_parse!("target_processing_time", '0'..='9' | '.' | '-', f64); - field_parse!("response_processing_time", '0'..='9' | '.' | '-', f64); - field!("elb_status_code", '0'..='9' | '-'); - field!("target_status_code", '0'..='9' | '-'); - field_parse!("received_bytes", '0'..='9' | '-', i64); - field_parse!("sent_bytes", '0'..='9' | '-', i64); - let request = get_value!("request", take_quoted1); - let mut iter = request.splitn(2, ' '); - log.insert("request_method".to_owned(), iter.next().unwrap().into()); // split always have at least 1 item - match iter.next() { - Some(value) => { - let mut iter = value.rsplitn(2, ' '); - log.insert("request_protocol".into(), iter.next().unwrap().into()); // same as previous one - match iter.next() { - Some(value) => log.insert("request_url".into(), value.into()), - None => return Err("failed to get field `request_url`".into()), - } - } - None => return Err("failed to get field `request_url`".into()), - }; - field_raw!("user_agent", take_quoted1); - field_raw!("ssl_cipher", take_anything); - field_raw!("ssl_protocol", take_anything); - field_raw!("target_group_arn", take_anything); - field_raw!("trace_id", take_quoted1); - field_raw!("domain_name", take_quoted1); - field_raw!("chosen_cert_arn", take_quoted1); - field!("matched_rule_priority", '0'..='9' | '-'); - field!( - "request_creation_time", - '0'..='9' | '.' | '-' | ':' | 'T' | 'Z' - ); - field_raw!("actions_executed", take_quoted1); - field_raw!("redirect_url", take_quoted1); - field_raw!("error_reason", take_quoted1); - field_raw!( - "target_port_list", - take_list(|c| matches!(c, '0'..='9' | '.' | ':' | '-')) - ); - field_raw!( - "target_status_code_list", - take_list(|c| matches!(c, '0'..='9')) - ); - field_raw!("classification", take_quoted1); - field_raw!("classification_reason", take_quoted1); - - match input.is_empty() { - true => Ok(log.into()), - false => Err(format!(r#"Log should be fully consumed: "{}""#, input).into()), - } -} - -type SResult<'a, O> = IResult<&'a str, O, (&'a str, nom::error::ErrorKind)>; - -fn take_anything(input: &str) -> SResult<&str> { - preceded(char(' '), take_while1(|c| c != ' '))(input) -} - -fn take_quoted1(input: &str) -> SResult { - delimited(tag(" \""), until_quote, char('"'))(input) -} - -fn until_quote(input: &str) -> SResult { - let mut ret = String::new(); - let mut skip_delimiter = false; - for (i, ch) in input.char_indices() { - if ch == '\\' && !skip_delimiter { - skip_delimiter = true; - } else if ch == '"' && !skip_delimiter { - return Ok((&input[i..], ret)); - } else { - ret.push(ch); - skip_delimiter = false; - } - } - Err(nom::Err::Incomplete(nom::Needed::Unknown)) -} - -fn take_list(cond: impl Fn(char) -> bool) -> impl FnOnce(&str) -> SResult> { - move |input: &str| { - alt(( - map_res(tag(r#" "-""#), |_| { - Ok::<_, std::convert::Infallible>(vec![]) - }), - map_res(preceded(char(' '), take_while1(cond)), |v: &str| { - Ok::<_, std::convert::Infallible>(vec![v]) - }), - ))(input) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - remap::test_type_def![ - value_string { - expr: |_| ParseAwsAlbLogFn { value: Literal::from("foo").boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - - value_optional { - expr: |_| ParseAwsAlbLogFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - ]; - - #[test] - fn parse_aws_alb_log() { - let logs = vec![ - r#"http 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -192.168.131.39:2817 10.0.0.1:80 0.000 0.001 0.000 200 200 34 366 -"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - - -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337262-36d228ad5d99923122bbe354" "-" "-" -0 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.0.1:80 200 "-" "-""#, - r#"https 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -192.168.131.39:2817 10.0.0.1:80 0.086 0.048 0.037 200 200 0 57 -"GET https://www.example.com:443/ HTTP/1.1" "curl/7.46.0" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2 -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337281-1d84f3d73c47ec4e58577259" "www.example.com" "arn:aws:acm:us-east-2:123456789012:certificate/12345678-1234-1234-1234-123456789012" -1 2018-07-02T22:22:48.364000Z "authenticate,forward" "-" "-" 10.0.0.1:80 200 "-" "-""#, - r#"h2 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -10.0.1.252:48160 10.0.0.66:9000 0.000 0.002 0.000 200 200 5 257 -"GET https://10.0.2.105:773/ HTTP/2.0" "curl/7.46.0" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2 -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337327-72bd00b0343d75b906739c42" "-" "-" -1 2018-07-02T22:22:48.364000Z "redirect" "https://example.com:80/" "-" 10.0.0.66:9000 200 "-" "-""#, - r#"ws 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -10.0.0.140:40914 10.0.1.192:8010 0.001 0.003 0.000 101 101 218 587 -"GET http://10.0.0.30:80/ HTTP/1.1" "-" - - -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" -1 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.1.192:8010 101 "-" "-""#, - r#"wss 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -10.0.0.140:44244 10.0.0.171:8010 0.000 0.001 0.000 101 101 218 786 -"GET https://10.0.0.30:443/ HTTP/1.1" "-" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2 -arn:aws:elasticloadbalancing:us-west-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" -1 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.0.171:8010 101 "-" "-""#, - r#"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366 -"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - - -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" -0 2018-11-30T22:22:48.364000Z "forward" "-" "-" "-" "-" "-" "-""#, - r#"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 -192.168.131.39:2817 - 0.000 0.001 0.000 502 - 34 366 -"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - - -arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 -"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" -0 2018-11-30T22:22:48.364000Z "forward" "-" "LambdaInvalidResponse" "-" "-" "-" "-""#, - ]; - let logs = logs - .into_iter() - .map(|s| s.replace('\n', " ")) - .collect::>(); - - for log in logs { - assert!(parse_log(&log).is_ok()) - } - } -} diff --git a/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs b/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs deleted file mode 100644 index fd889b9c8cced..0000000000000 --- a/lib/remap-functions/src/parse_aws_cloudwatch_log_subscription_message.rs +++ /dev/null @@ -1,156 +0,0 @@ -use remap::prelude::*; -use shared::{aws_cloudwatch_logs_subscription::AwsCloudWatchLogsSubscriptionMessage, btreemap}; -use value::Kind; - -#[derive(Clone, Copy, Debug)] -pub struct ParseAwsCloudWatchLogSubscriptionMessage; - -impl Function for ParseAwsCloudWatchLogSubscriptionMessage { - fn identifier(&self) -> &'static str { - "parse_aws_cloudwatch_log_subscription_message" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| matches!(v, Value::Bytes(_)), - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ParseAwsCloudWatchLogSubscriptionMessageFn { - value, - })) - } -} - -#[derive(Debug, Clone)] -struct ParseAwsCloudWatchLogSubscriptionMessageFn { - value: Box, -} - -impl Expression for ParseAwsCloudWatchLogSubscriptionMessageFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let bytes = self.value.execute(state, object)?.try_bytes()?; - - let message = serde_json::from_slice::(&bytes) - .map_err(|e| format!("unable to parse: {}", e))?; - - Ok(btreemap! { - "owner" => message.owner, - "message_type" => message.message_type.as_str(), - "log_group" => message.log_group, - "log_stream" => message.log_stream, - "subscription_filters" => message.subscription_filters, - "log_events" => message.log_events.into_iter().map(|event| btreemap![ - "id" => event.id, - "timestamp" => event.timestamp, - "message" => event.message, - ]).collect::>(), - } - .into()) - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - self.value - .type_def(state) - .into_fallible(true) // Message parsing error - .with_inner_type(Some(inner_type_def())) - .with_constraint(value::Kind::Map) - } -} - -/// The type defs of the fields contained by the returned map. -fn inner_type_def() -> InnerTypeDef { - inner_type_def! ({ - "owner": Kind::Bytes, - "message_type": Kind::Bytes, - "log_group": Kind::Bytes, - "log_stream": Kind::Bytes, - "subscription_filters": TypeDef::from(Kind::Array) - .with_inner_type(Some(inner_type_def!([ Kind::Bytes ]))), - "log_events": TypeDef::from(Kind::Array) - .with_inner_type(Some(inner_type_def! ({ - "id": Kind::Bytes, - "timestamp": Kind::Timestamp, - "message": Kind::Bytes, - }))) - }) -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::{TimeZone, Utc}; - use value::Kind; - - test_function![ - parse_aws_cloudwatch_log_subscription_message => ParseAwsCloudWatchLogSubscriptionMessage; - - invalid_type { - args: func_args![value: "42"], - want: Err("function call error: unable to parse: invalid type: integer `42`, expected struct AwsCloudWatchLogsSubscriptionMessage at line 1 column 2"), - } - - string { - args: func_args![value: r#" -{ - "messageType": "DATA_MESSAGE", - "owner": "071959437513", - "logGroup": "/jesse/test", - "logStream": "test", - "subscriptionFilters": [ - "Destination" - ], - "logEvents": [ - { - "id": "35683658089614582423604394983260738922885519999578275840", - "timestamp": 1600110569039, - "message": "{\"bytes\":26780,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"157.130.216.193\",\"method\":\"PUT\",\"protocol\":\"HTTP/1.0\",\"referer\":\"https://www.principalcross-platform.io/markets/ubiquitous\",\"request\":\"/expedite/convergence\",\"source_type\":\"stdin\",\"status\":301,\"user-identifier\":\"-\"}" - }, - { - "id": "35683658089659183914001456229543810359430816722590236673", - "timestamp": 1600110569041, - "message": "{\"bytes\":17707,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"109.81.244.252\",\"method\":\"GET\",\"protocol\":\"HTTP/2.0\",\"referer\":\"http://www.investormission-critical.io/24/7/vortals\",\"request\":\"/scale/functionalities/optimize\",\"source_type\":\"stdin\",\"status\":502,\"user-identifier\":\"feeney1708\"}" - } - ] -} -"#], - want: Ok(btreemap! { - "owner" => "071959437513", - "message_type" => "DATA_MESSAGE", - "log_group" => "/jesse/test", - "log_stream" => "test", - "subscription_filters" => vec!["Destination"], - "log_events" => vec![btreemap! { - "id" => "35683658089614582423604394983260738922885519999578275840", - "timestamp" => Utc.timestamp(1600110569, 39000000), - "message" => "{\"bytes\":26780,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"157.130.216.193\",\"method\":\"PUT\",\"protocol\":\"HTTP/1.0\",\"referer\":\"https://www.principalcross-platform.io/markets/ubiquitous\",\"request\":\"/expedite/convergence\",\"source_type\":\"stdin\",\"status\":301,\"user-identifier\":\"-\"}", - }, btreemap! { - "id" => "35683658089659183914001456229543810359430816722590236673", - "timestamp" => Utc.timestamp(1600110569, 41000000), - "message" => "{\"bytes\":17707,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"109.81.244.252\",\"method\":\"GET\",\"protocol\":\"HTTP/2.0\",\"referer\":\"http://www.investormission-critical.io/24/7/vortals\",\"request\":\"/scale/functionalities/optimize\",\"source_type\":\"stdin\",\"status\":502,\"user-identifier\":\"feeney1708\"}", - }], - }) - } - - invalid_value { - args: func_args![value: r#"{ INVALID }"#], - want: Err("function call error: unable to parse: key must be a string at line 1 column 3"), - } - ]; - - test_type_def![value_string { - expr: |_| ParseAwsCloudWatchLogSubscriptionMessageFn { - value: Literal::from("foo").boxed(), - }, - def: TypeDef { - fallible: true, - kind: Kind::Map, - inner_type_def: Some(inner_type_def()), - }, - }]; -} diff --git a/lib/remap-functions/src/parse_aws_vpc_flow_log.rs b/lib/remap-functions/src/parse_aws_vpc_flow_log.rs deleted file mode 100644 index 25f38b9a8cde4..0000000000000 --- a/lib/remap-functions/src/parse_aws_vpc_flow_log.rs +++ /dev/null @@ -1,260 +0,0 @@ -use remap::prelude::*; -use std::collections::BTreeMap; -use value::Kind; - -#[derive(Clone, Copy, Debug)] -pub struct ParseAwsVpcFlowLog; - -impl Function for ParseAwsVpcFlowLog { - fn identifier(&self) -> &'static str { - "parse_aws_vpc_flow_log" - } - - fn parameters(&self) -> &'static [Parameter] { - &[ - Parameter { - keyword: "value", - accepts: |v| matches!(v, Value::Bytes(_)), - required: true, - }, - Parameter { - keyword: "format", - accepts: |v| matches!(v, Value::Bytes(_)), - required: false, - }, - ] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - let format = arguments.optional("format").map(Expr::boxed); - - Ok(Box::new(ParseAwsVpcFlowLogFn::new(value, format))) - } -} - -#[derive(Debug, Clone)] -struct ParseAwsVpcFlowLogFn { - value: Box, - format: Option>, -} - -impl ParseAwsVpcFlowLogFn { - fn new(value: Box, format: Option>) -> Self { - Self { value, format } - } -} - -impl Expression for ParseAwsVpcFlowLogFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let bytes = self.value.execute(state, object)?.try_bytes()?; - let input = String::from_utf8_lossy(&bytes); - - match &self.format { - Some(expr) => { - let bytes = expr.execute(state, object)?.try_bytes()?; - parse_log(&input, Some(&String::from_utf8_lossy(&bytes))) - } - None => parse_log(&input, None), - } - .map_err(Into::into) - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - self.value - .type_def(state) - .into_fallible(true) // Log parsin_ error - .with_inner_type(Some(inner_type_def())) - .with_constraint(value::Kind::Map) - } -} - -/// The type defs of the fields contained by the returned map. -fn inner_type_def() -> InnerTypeDef { - inner_type_def! ({ - "version": Kind::Integer | Kind::Null, - "account_id": Kind::Integer | Kind::Null, - "interface_id": Kind::Bytes | Kind::Null, - "srcaddr": Kind::Bytes | Kind::Null, - "dstaddr": Kind::Bytes | Kind::Null, - "srcport": Kind::Integer | Kind::Null, - "dstport": Kind::Integer | Kind::Null, - "protocol": Kind::Integer | Kind::Null, - "packets": Kind::Integer | Kind::Null, - "bytes": Kind::Integer | Kind::Null, - "start": Kind::Integer | Kind::Null, - "end": Kind::Integer | Kind::Null, - "action": Kind::Bytes | Kind::Null, - "log_status": Kind::Bytes | Kind::Null, - "vpc_id": Kind::Bytes | Kind::Null, - "subnet_id": Kind::Bytes | Kind::Null, - "instance_id": Kind::Bytes | Kind::Null, - "tcp_flags": Kind::Integer | Kind::Null, - "type": Kind::Bytes | Kind::Null, - "pkt_srcaddr": Kind::Bytes | Kind::Null, - "pkt_dstaddr": Kind::Bytes | Kind::Null, - "region": Kind::Bytes | Kind::Null, - "az_id": Kind::Bytes | Kind::Null, - "sublocation_type": Kind::Bytes | Kind::Null, - }) -} - -type ParseResult = std::result::Result; - -#[allow(clippy::unnecessary_wraps)] // match other parse methods -fn identity<'a>(_key: &'a str, value: &'a str) -> ParseResult<&'a str> { - Ok(value) -} - -fn parse_i64(key: &str, value: &str) -> ParseResult { - value - .parse() - .map_err(|_| format!("failed to parse value as i64 (key: `{}`): `{}`", key, value)) -} - -macro_rules! create_match { - ($log:expr, $key:expr, $value:expr, $($name:expr => $transform:expr),+) => { - match $key { - $($name => { - let value = match $value { - "-" => Value::Null, - value => $transform($name, value)?.into(), - }; - if $log.insert($name.into(), value).is_some() { - return Err(format!("value already exists for key: `{}`", $key)); - } - })+ - key => return Err(format!("unknown key: `{}`", key)) - }; - }; -} - -fn parse_log(input: &str, format: Option<&str>) -> ParseResult { - let mut log = BTreeMap::new(); - - let mut input = input.split(' '); - let mut format = format - .unwrap_or("version account_id interface_id srcaddr dstaddr srcport dstport protocol packets bytes start end action log_status") - .split(' '); - - loop { - return match (format.next(), input.next()) { - (Some(key), Some(value)) => { - create_match!( - log, key, value, - "version" => parse_i64, - "account_id" => parse_i64, - "interface_id" => identity, - "srcaddr" => identity, - "dstaddr" => identity, - "srcport" => parse_i64, - "dstport" => parse_i64, - "protocol" => parse_i64, - "packets" => parse_i64, - "bytes" => parse_i64, - "start" => parse_i64, - "end" => parse_i64, - "action" => identity, - "log_status" => identity, - "vpc_id" => identity, - "subnet_id" => identity, - "instance_id" => identity, - "tcp_flags" => parse_i64, - "type" => identity, - "pkt_srcaddr" => identity, - "pkt_dstaddr" => identity, - "region" => identity, - "az_id" => identity, - "sublocation_type" => identity, - "sublocation_id" => identity - ); - - continue; - } - (None, Some(value)) => Err(format!("no key for value: `{}`", value)), - (Some(key), None) => Err(format!("no item for key: `{}`", key)), - (None, None) => Ok(log.into()), - }; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use value::Kind; - - remap::test_type_def![ - value_noop { - expr: |_| ParseAwsVpcFlowLogFn::new(Box::new(Noop), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - - value_non_string { - expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from(1).boxed(), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - - value_string { - expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from("foo").boxed(), None), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - - format_non_string { - expr: |_| ParseAwsVpcFlowLogFn::new(Literal::from("foo").boxed(), Some(Literal::from(1).boxed())), - def: TypeDef { fallible: true, kind: Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - ]; - - #[test] - fn parse_aws_vpc_flow_log() { - // Examples from https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-records-examples.html - let logs = vec![( - None, - vec![ - "2 123456789010 eni-1235b8ca123456789 172.31.16.139 172.31.16.21 20641 22 6 20 4249 1418530010 1418530070 ACCEPT OK", - "2 123456789010 eni-1235b8ca123456789 172.31.9.69 172.31.9.12 49761 3389 6 20 4249 1418530010 1418530070 REJECT OK", - "2 123456789010 eni-1235b8ca123456789 - - - - - - - 1431280876 1431280934 - NODATA", - "2 123456789010 eni-11111111aaaaaaaaa - - - - - - - 1431280876 1431280934 - SKIPDATA", - "2 123456789010 eni-1235b8ca123456789 203.0.113.12 172.31.16.139 0 0 1 4 336 1432917027 1432917142 ACCEPT OK", - "2 123456789010 eni-1235b8ca123456789 172.31.16.139 203.0.113.12 0 0 1 4 336 1432917094 1432917142 REJECT OK", - "2 123456789010 eni-1235b8ca123456789 2001:db8:1234:a100:8d6e:3477:df66:f105 2001:db8:1234:a102:3304:8879:34cf:4071 34892 22 6 54 8855 1477913708 1477913820 ACCEPT OK", - ] - ), ( - Some("version vpc_id subnet_id instance_id interface_id account_id type srcaddr dstaddr srcport dstport pkt_srcaddr pkt_dstaddr protocol bytes packets start end action tcp_flags log_status"), - vec![ - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 52.213.180.42 10.0.0.62 43416 5001 52.213.180.42 10.0.0.62 6 568 8 1566848875 1566848933 ACCEPT 2 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 10.0.0.62 52.213.180.42 5001 43416 10.0.0.62 52.213.180.42 6 376 7 1566848875 1566848933 ACCEPT 18 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 52.213.180.42 10.0.0.62 43418 5001 52.213.180.42 10.0.0.62 6 100701 70 1566848875 1566848933 ACCEPT 2 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 10.0.0.62 52.213.180.42 5001 43418 10.0.0.62 52.213.180.42 6 632 12 1566848875 1566848933 ACCEPT 18 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 10.0.0.62 52.213.180.42 5001 43418 10.0.0.62 52.213.180.42 6 63388 1219 1566848933 1566849113 ACCEPT 1 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 52.213.180.42 10.0.0.62 43418 5001 52.213.180.42 10.0.0.62 6 23294588 15774 1566848933 1566849113 ACCEPT 1 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 52.213.180.42 10.0.0.62 43638 5001 52.213.180.42 10.0.0.62 6 1260 17 1566933133 1566933193 ACCEPT 3 OK", - "3 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-01234567890123456 eni-1235b8ca123456789 123456789010 IPv4 10.0.0.62 52.213.180.42 5001 43638 10.0.0.62 52.213.180.42 6 967 14 1566933133 1566933193 ACCEPT 19 OK", - ] - ), ( - Some("instance_id interface_id srcaddr dstaddr pkt_srcaddr pkt_dstaddr"), - vec![ - "- eni-1235b8ca123456789 10.0.1.5 10.0.0.220 10.0.1.5 203.0.113.5", - "- eni-1235b8ca123456789 10.0.0.220 203.0.113.5 10.0.0.220 203.0.113.5", - "- eni-1235b8ca123456789 203.0.113.5 10.0.0.220 203.0.113.5 10.0.0.220", - "- eni-1235b8ca123456789 10.0.0.220 10.0.1.5 203.0.113.5 10.0.1.5", - "i-01234567890123456 eni-1111aaaa2222bbbb3 10.0.1.5 203.0.113.5 10.0.1.5 203.0.113.5", - "i-01234567890123456 eni-1111aaaa2222bbbb3 203.0.113.5 10.0.1.5 203.0.113.5 10.0.1.5", - ] - ), ( - Some("version interface_id account_id vpc_id subnet_id instance_id srcaddr dstaddr srcport dstport protocol tcp_flags type pkt_srcaddr pkt_dstaddr action log_status"), - vec![ - "3 eni-33333333333333333 123456789010 vpc-abcdefab012345678 subnet-22222222bbbbbbbbb i-01234567890123456 10.20.33.164 10.40.2.236 39812 80 6 3 IPv4 10.20.33.164 10.40.2.236 ACCEPT OK", - "3 eni-33333333333333333 123456789010 vpc-abcdefab012345678 subnet-22222222bbbbbbbbb i-01234567890123456 10.40.2.236 10.20.33.164 80 39812 6 19 IPv4 10.40.2.236 10.20.33.164 ACCEPT OK", - "3 eni-11111111111111111 123456789010 vpc-abcdefab012345678 subnet-11111111aaaaaaaaa - 10.40.1.175 10.40.2.236 39812 80 6 3 IPv4 10.20.33.164 10.40.2.236 ACCEPT OK", - "3 eni-22222222222222222 123456789010 vpc-abcdefab012345678 subnet-22222222bbbbbbbbb - 10.40.2.236 10.40.2.31 80 39812 6 19 IPv4 10.40.2.236 10.20.33.164 ACCEPT OK", - ] - )]; - - for (format, logs) in logs { - for log in logs { - assert!(parse_log(&log, format).is_ok()); - } - } - } -} diff --git a/lib/remap-functions/src/parse_syslog.rs b/lib/remap-functions/src/parse_syslog.rs deleted file mode 100644 index 5d4b4cfadf042..0000000000000 --- a/lib/remap-functions/src/parse_syslog.rs +++ /dev/null @@ -1,265 +0,0 @@ -use chrono::{DateTime, Datelike, Utc}; -use remap::prelude::*; -use remap::value::Kind; -use std::collections::BTreeMap; -use syslog_loose::{IncompleteDate, Message, ProcId}; - -#[derive(Clone, Copy, Debug)] -pub struct ParseSyslog; - -impl Function for ParseSyslog { - fn identifier(&self) -> &'static str { - "parse_syslog" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| matches!(v, Value::Bytes(_)), - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ParseSyslogFn { value })) - } -} - -#[derive(Debug, Clone)] -struct ParseSyslogFn { - value: Box, -} - -impl ParseSyslogFn { - #[cfg(test)] - fn new(value: Box) -> Self { - Self { value } - } -} - -/// Function used to resolve the year for syslog messages that don't include the -/// year. If the current month is January, and the syslog message is for -/// December, it will take the previous year. Otherwise, take the current year. -fn resolve_year((month, _date, _hour, _min, _sec): IncompleteDate) -> i32 { - let now = Utc::now(); - if now.month() == 1 && month == 12 { - now.year() - 1 - } else { - now.year() - } -} - -/// Create a Value::Map from the fields of the given syslog message. -fn message_to_value(message: Message<&str>) -> Value { - let mut result = BTreeMap::new(); - - result.insert("message".to_string(), message.msg.to_string().into()); - - if let Some(host) = message.hostname { - result.insert("hostname".to_string(), host.to_string().into()); - } - - if let Some(severity) = message.severity { - result.insert("severity".to_string(), severity.as_str().to_owned().into()); - } - - if let Some(facility) = message.facility { - result.insert("facility".to_string(), facility.as_str().to_owned().into()); - } - - if let Some(app_name) = message.appname { - result.insert("appname".to_string(), app_name.to_owned().into()); - } - - if let Some(msg_id) = message.msgid { - result.insert("msgid".to_string(), msg_id.to_owned().into()); - } - - if let Some(timestamp) = message.timestamp { - let timestamp: DateTime = timestamp.into(); - result.insert("timestamp".to_string(), timestamp.into()); - } - - if let Some(procid) = message.procid { - let value: Value = match procid { - ProcId::PID(pid) => pid.into(), - ProcId::Name(name) => name.to_string().into(), - }; - result.insert("procid".to_string(), value); - } - - for element in message.structured_data.into_iter() { - for (name, value) in element.params.into_iter() { - let key = format!("{}.{}", element.id, name); - result.insert(key, value.to_string().into()); - } - } - - result.into() -} - -impl Expression for ParseSyslogFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let bytes = self.value.execute(state, object)?.try_bytes()?; - let message = String::from_utf8_lossy(&bytes); - - let parsed = syslog_loose::parse_message_with_year_exact(&message, resolve_year)?; - - Ok(message_to_value(parsed)) - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - self.value - .type_def(state) - .into_fallible(true) - .with_constraint(Kind::Map) - .with_inner_type(Some(inner_type_def())) - } -} - -fn inner_type_def() -> InnerTypeDef { - inner_type_def! ({ - "message": Kind::Bytes, - "hostname": Kind::Bytes | Kind::Null, - "severity": Kind::Bytes | Kind::Null, - "facility": Kind::Bytes | Kind::Null, - "appname": Kind::Bytes | Kind::Null, - "msgid": Kind::Bytes | Kind::Null, - "timestamp": Kind::Timestamp | Kind::Null, - "procid": Kind::Bytes | Kind::Integer | Kind::Null - }) -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::prelude::*; - use shared::btreemap; - - remap::test_type_def![ - value_string { - expr: |_| ParseSyslogFn { value: Literal::from("foo").boxed() }, - def: TypeDef { kind: Kind::Map, - fallible: true, - inner_type_def: Some(inner_type_def()), - }, - } - - value_non_string { - expr: |_| ParseSyslogFn { value: Literal::from(1).boxed() }, - def: TypeDef { fallible: true, - kind: Kind::Map, - inner_type_def: Some(inner_type_def()), - }, - } - - value_optional { - expr: |_| ParseSyslogFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, - kind: Kind::Map, - inner_type_def: Some(inner_type_def()), - }, - } - ]; - - remap::test_function![ - parse_syslog => ParseSyslog; - - valid { - args: func_args![value: r#"<13>1 2020-03-13T20:45:38.119Z dynamicwireless.name non 2426 ID931 [exampleSDID@32473 iut="3" eventSource= "Application" eventID="1011"] Try to override the THX port, maybe it will reboot the neural interface!"#], - want: Ok(btreemap! { - "severity" => "notice", - "facility" => "user", - "timestamp" => chrono::Utc.ymd(2020, 3, 13).and_hms_milli(20, 45, 38, 119), - "hostname" => "dynamicwireless.name", - "appname" => "non", - "procid" => 2426, - "msgid" => "ID931", - "exampleSDID@32473.iut" => "3", - "exampleSDID@32473.eventSource" => "Application", - "exampleSDID@32473.eventID" => "1011", - "message" => "Try to override the THX port, maybe it will reboot the neural interface!", - }) - } - - invalid { - args: func_args![value: "not much of a syslog message"], - want: Err("function call error: unable to parse input as valid syslog message".to_string()) - } - - haproxy { - args: func_args![value: r#"<133>Jun 13 16:33:35 haproxy[73411]: Proxy sticky-servers started."#], - want: Ok(btreemap! { - "facility" => "local0", - "severity" => "notice", - "message" => "Proxy sticky-servers started.", - "timestamp" => DateTime::::from(chrono::Local.ymd(Utc::now().year(), 6, 13).and_hms_milli(16, 33, 35, 0)), - "appname" => "haproxy", - "procid" => 73411, - }) - } - - missing_pri { - args: func_args![value: r#"Jun 13 16:33:35 haproxy[73411]: I am missing a pri."#], - want: Ok(btreemap! { - "message" => "I am missing a pri.", - "timestamp" => DateTime::::from(chrono::Local.ymd(Utc::now().year(), 6, 13).and_hms_milli(16, 33, 35, 0)), - "appname" => "haproxy", - "procid" => 73411, - }) - } - ]; - - #[test] - fn handles_empty_sd_element() { - fn there_is_map_called_empty(value: Value) -> Result { - match value { - Value::Map(map) => { - Ok(map.iter().find(|(key, _)| (&key[..]).starts_with("empty")) == None) - } - _ => Err("Result was not a map".into()), - } - } - - let mut state = state::Program::default(); - let mut object: Value = btreemap! {}.into(); - - let msg = format!( - r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - {} qwerty"#, - r#"[empty]"# - ); - - let query = ParseSyslogFn::new(Box::new(Literal::from(msg))); - let value = query.execute(&mut state, &mut object).unwrap(); - assert!(there_is_map_called_empty(value).unwrap()); - - let msg = format!( - r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - {} qwerty"#, - r#"[non_empty x="1"][empty]"# - ); - - let query = ParseSyslogFn::new(Box::new(Literal::from(msg))); - let value = query.execute(&mut state, &mut object).unwrap(); - assert!(there_is_map_called_empty(value).unwrap()); - - let msg = format!( - r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - {} qwerty"#, - r#"[empty][non_empty x="1"]"# - ); - - let query = ParseSyslogFn::new(Box::new(Literal::from(msg))); - let value = query.execute(&mut state, &mut object).unwrap(); - assert!(there_is_map_called_empty(value).unwrap()); - - let msg = format!( - r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - {} qwerty"#, - r#"[empty not_really="testing the test"]"# - ); - - let query = ParseSyslogFn::new(Box::new(Literal::from(msg))); - let value = query.execute(&mut state, &mut object).unwrap(); - assert!(!there_is_map_called_empty(value).unwrap()); - } -} diff --git a/lib/remap-functions/src/parse_url.rs b/lib/remap-functions/src/parse_url.rs deleted file mode 100644 index 484b954521ebe..0000000000000 --- a/lib/remap-functions/src/parse_url.rs +++ /dev/null @@ -1,173 +0,0 @@ -use remap::prelude::*; -use std::collections::BTreeMap; -use std::iter::FromIterator; -use url::Url; -use value::Kind; - -#[derive(Clone, Copy, Debug)] -pub struct ParseUrl; - -impl Function for ParseUrl { - fn identifier(&self) -> &'static str { - "parse_url" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| matches!(v, Value::Bytes(_)), - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ParseUrlFn { value })) - } -} - -#[derive(Debug, Clone)] -struct ParseUrlFn { - value: Box, -} - -impl ParseUrlFn { - #[cfg(test)] - fn new(value: Box) -> Self { - Self { value } - } -} - -impl Expression for ParseUrlFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let value = self.value.execute(state, object)?; - let string = value.try_bytes_utf8_lossy()?; - - Url::parse(&string) - .map_err(|e| format!("unable to parse url: {}", e).into()) - .map(url_to_value) - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - self.value - .type_def(state) - .into_fallible(true) // URL parsing error - .with_inner_type(Some(inner_type_def())) - .with_constraint(value::Kind::Map) - } -} - -/// The type defs of the fields contained by the returned map. -fn inner_type_def() -> InnerTypeDef { - inner_type_def! ({ - "scheme": Kind::Bytes, - "username": Kind::Bytes, - "password": Kind::Bytes, - "path": Kind::Bytes | Kind::Null, - "host": Kind::Bytes, - "port": Kind::Bytes, - "fragment": Kind::Bytes | Kind::Null, - "query": Kind::Map, - }) -} - -fn url_to_value(url: Url) -> Value { - let mut map = BTreeMap::<&str, Value>::new(); - - map.insert("scheme", url.scheme().to_owned().into()); - map.insert("username", url.username().to_owned().into()); - map.insert( - "password", - url.password() - .map(ToOwned::to_owned) - .unwrap_or_default() - .into(), - ); - map.insert("path", url.path().to_owned().into()); - map.insert("host", url.host_str().map(ToOwned::to_owned).into()); - map.insert("port", url.port().map(|v| v as i64).into()); - map.insert("fragment", url.fragment().map(ToOwned::to_owned).into()); - map.insert( - "query", - url.query_pairs() - .into_owned() - .map(|(k, v)| (k, v.into())) - .collect::>() - .into(), - ); - - Value::from_iter(map.into_iter().map(|(k, v)| (k.to_owned(), v))) -} - -#[cfg(test)] -mod tests { - use super::*; - use shared::btreemap; - - remap::test_type_def![ - value_string { - expr: |_| ParseUrlFn { value: Literal::from("foo").boxed() }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - - value_optional { - expr: |_| ParseUrlFn { value: Box::new(Noop) }, - def: TypeDef { fallible: true, kind: value::Kind::Map, inner_type_def: Some(inner_type_def()) }, - } - ]; - - #[test] - fn parse_url() { - let cases = vec![ - ( - btreemap! {}, - Ok(btreemap! { - "scheme" => "https", - "username" => "", - "password" => "", - "host" => "vector.dev", - "port" => Value::Null, - "path" => "/", - "query" => btreemap!{}, - "fragment" => Value::Null, - } - .into()), - ParseUrlFn::new(Box::new(Literal::from("https://vector.dev"))), - ), - ( - btreemap! {}, - Ok(btreemap! { - "scheme" => "ftp", - "username" => "foo", - "password" => "bar", - "host" => "vector.dev", - "port" => 4343, - "path" => "/foobar", - "query" => btreemap!{ "hello" => "world" }, - "fragment" => "123", - } - .into()), - ParseUrlFn::new(Box::new(Literal::from( - "ftp://foo:bar@vector.dev:4343/foobar?hello=world#123", - ))), - ), - ( - btreemap! {}, - Err("function call error: unable to parse url: relative URL without a base".into()), - ParseUrlFn::new(Box::new(Literal::from("INVALID"))), - ), - ]; - - let mut state = state::Program::default(); - - for (object, exp, func) in cases { - let mut object: Value = object.into(); - let got = func - .execute(&mut state, &mut object) - .map_err(|e| format!("{:#}", anyhow::anyhow!(e))); - - assert_eq!(got, exp); - } - } -} diff --git a/lib/remap-functions/src/to_int.rs b/lib/remap-functions/src/to_int.rs deleted file mode 100644 index de099d4b1f029..0000000000000 --- a/lib/remap-functions/src/to_int.rs +++ /dev/null @@ -1,170 +0,0 @@ -use remap::prelude::*; -use shared::conversion::Conversion; - -#[derive(Clone, Copy, Debug)] -pub struct ToInt; - -impl Function for ToInt { - fn identifier(&self) -> &'static str { - "to_int" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| { - matches!( - v, - Value::Integer(_) - | Value::Float(_) - | Value::Bytes(_) - | Value::Boolean(_) - | Value::Timestamp(_) - | Value::Null - ) - }, - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ToIntFn { value })) - } -} - -#[derive(Debug, Clone)] -struct ToIntFn { - value: Box, -} - -impl ToIntFn { - #[cfg(test)] - fn new(value: Box) -> Self { - Self { value } - } -} - -impl Expression for ToIntFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - use Value::*; - - let value = self.value.execute(state, object)?; - - match value { - Integer(_) => Ok(value), - Float(v) => Ok(Integer(v as i64)), - Boolean(v) => Ok(Integer(if v { 1 } else { 0 })), - Null => Ok(0.into()), - Bytes(v) => Conversion::Integer - .convert(v) - .map_err(|e| e.to_string().into()), - Timestamp(v) => Ok(v.timestamp().into()), - Array(_) | Map(_) | Regex(_) => Err("unable to convert value to integer".into()), - } - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - use value::Kind; - - self.value - .type_def(state) - .fallible_unless( - Kind::Integer - | Kind::Float - | Kind::Bytes - | Kind::Boolean - | Kind::Timestamp - | Kind::Null, - ) - .with_constraint(Kind::Integer) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::{DateTime, Utc}; - use value::Kind; - - remap::test_type_def![ - boolean_infallible { - expr: |_| ToIntFn { value: lit!(true).boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - - integer_infallible { - expr: |_| ToIntFn { value: lit!(1).boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - - float_infallible { - expr: |_| ToIntFn { value: lit!(1.0).boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - - null_infallible { - expr: |_| ToIntFn { value: lit!(null).boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - - string_fallible { - expr: |_| ToIntFn { value: lit!("foo").boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - - map_fallible { - expr: |_| ToIntFn { value: map!{}.boxed() }, - def: TypeDef { fallible: true, kind: Kind::Integer, ..Default::default() }, - } - - array_fallible { - expr: |_| ToIntFn { value: array![].boxed() }, - def: TypeDef { fallible: true, kind: Kind::Integer, ..Default::default() }, - } - - timestamp_infallible { - expr: |_| ToIntFn { value: Literal::from(chrono::Utc::now()).boxed() }, - def: TypeDef { kind: Kind::Integer, ..Default::default() }, - } - ]; - - #[test] - fn to_int() { - use shared::btreemap; - - let cases = vec![ - ( - btreemap! { "foo" => "20" }, - Ok(Value::Integer(20)), - ToIntFn::new(Box::new(Path::from("foo"))), - ), - ( - btreemap! { "foo" => 20.5 }, - Ok(Value::Integer(20)), - ToIntFn::new(Box::new(Path::from("foo"))), - ), - ( - btreemap! { - "foo" => DateTime::parse_from_rfc2822("Wed, 16 Oct 2019 12:00:00 +0000") - .unwrap() - .with_timezone(&Utc), - }, - Ok(Value::Integer(1571227200)), - ToIntFn::new(Box::new(Path::from("foo"))), - ), - ]; - - let mut state = state::Program::default(); - - for (object, exp, func) in cases { - let mut object: Value = object.into(); - let got = func - .execute(&mut state, &mut object) - .map_err(|e| format!("{:#}", anyhow::anyhow!(e))); - - assert_eq!(got, exp); - } - } -} diff --git a/lib/remap-functions/src/to_timestamp.rs b/lib/remap-functions/src/to_timestamp.rs deleted file mode 100644 index fa8ed9ef35a4a..0000000000000 --- a/lib/remap-functions/src/to_timestamp.rs +++ /dev/null @@ -1,162 +0,0 @@ -use chrono::{TimeZone, Utc}; -use remap::prelude::*; -use shared::conversion::Conversion; - -#[derive(Clone, Copy, Debug)] -pub struct ToTimestamp; - -impl Function for ToTimestamp { - fn identifier(&self) -> &'static str { - "to_timestamp" - } - - fn parameters(&self) -> &'static [Parameter] { - &[Parameter { - keyword: "value", - accepts: |v| { - matches!( - v, - Value::Integer(_) | Value::Float(_) | Value::Bytes(_) | Value::Timestamp(_) - ) - }, - required: true, - }] - } - - fn compile(&self, mut arguments: ArgumentList) -> Result> { - let value = arguments.required("value")?.boxed(); - - Ok(Box::new(ToTimestampFn { value })) - } -} - -#[derive(Debug, Clone)] -struct ToTimestampFn { - value: Box, -} - -impl ToTimestampFn { - #[cfg(test)] - fn new(value: Box) -> Self { - Self { value } - } -} - -impl Expression for ToTimestampFn { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - use Value::*; - - let value = self.value.execute(state, object)?; - - match value { - Timestamp(_) => Ok(value), - Integer(v) => Ok(Timestamp(Utc.timestamp(v, 0))), - Float(v) => Ok(Timestamp(Utc.timestamp(v.round() as i64, 0))), - Bytes(v) => Conversion::Timestamp - .convert(v) - .map_err(|e| e.to_string().into()), - Boolean(_) | Array(_) | Map(_) | Regex(_) | Null => { - Err("unable to convert value to timestamp".into()) - } - } - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - use value::Kind; - - self.value - .type_def(state) - .fallible_unless(Kind::Timestamp | Kind::Integer | Kind::Float) - .with_constraint(Kind::Timestamp) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use value::Kind; - - remap::test_type_def![ - timestamp_infallible { - expr: |_| ToTimestampFn { value: Literal::from(chrono::Utc::now()).boxed() }, - def: TypeDef { kind: Kind::Timestamp, ..Default::default() }, - } - - integer_infallible { - expr: |_| ToTimestampFn { value: lit!(1).boxed() }, - def: TypeDef { kind: Kind::Timestamp, ..Default::default() }, - } - - float_infallible { - expr: |_| ToTimestampFn { value: lit!(1.0).boxed() }, - def: TypeDef { kind: Kind::Timestamp, ..Default::default() }, - } - - null_fallible { - expr: |_| ToTimestampFn { value: lit!(null).boxed() }, - def: TypeDef { - fallible: true, - kind: Kind::Timestamp, - ..Default::default() - }, - } - - string_fallible { - expr: |_| ToTimestampFn { value: lit!("foo").boxed() }, - def: TypeDef { - fallible: true, - kind: Kind::Timestamp, - ..Default::default() - }, - } - - map_fallible { - expr: |_| ToTimestampFn { value: map!{}.boxed() }, - def: TypeDef { - fallible: true, - kind: Kind::Timestamp, - ..Default::default() - }, - } - - array_fallible { - expr: |_| ToTimestampFn { value: array![].boxed() }, - def: TypeDef { - fallible: true, - kind: Kind::Timestamp, - ..Default::default() - }, - } - - boolean_fallible { - expr: |_| ToTimestampFn { value: lit!(true).boxed() }, - def: TypeDef { - fallible: true, - kind: Kind::Timestamp, - ..Default::default() - }, - } - ]; - - #[test] - fn to_timestamp() { - use shared::btreemap; - - let cases = vec![( - btreemap! { "foo" => Utc.timestamp(10, 0) }, - Ok(Value::Timestamp(Utc.timestamp(10, 0))), - ToTimestampFn::new(Box::new(Path::from("foo"))), - )]; - - let mut state = state::Program::default(); - - for (object, exp, func) in cases { - let mut object: Value = object.into(); - let got = func - .execute(&mut state, &mut object) - .map_err(|e| format!("{:#}", anyhow::anyhow!(e))); - - assert_eq!(got, exp); - } - } -} diff --git a/lib/remap-lang/src/expression/assignment.rs b/lib/remap-lang/src/expression/assignment.rs deleted file mode 100644 index 98344d6f77b2a..0000000000000 --- a/lib/remap-lang/src/expression/assignment.rs +++ /dev/null @@ -1,325 +0,0 @@ -use super::Error as E; -use crate::{ - expression::{Path, Variable}, - path, state, - value::Kind, - Expr, Expression, Field, InnerTypeDef, Object, Result, Segment, TypeDef, Value, -}; -use std::fmt; -use std::str::FromStr; - -#[derive(thiserror::Error, Clone, Debug, PartialEq)] -pub enum Error { - #[error("unable to insert value in path: {0}")] - PathInsertion(String), -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Target { - Path(Path), - Variable(Variable), - Infallible { ok: Box, err: Box }, -} - -impl fmt::Display for Target { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Target::Path(path) => path.fmt(f), - Target::Variable(var) => var.fmt(f), - Target::Infallible { ok, err } => { - ok.as_ref().fmt(f)?; - f.write_str(", ")?; - err.as_ref().fmt(f) - } - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Assignment { - target: Target, - value: Box, -} - -/// Add the type def for this path to the compiler state. -/// We recurse down any inner typedefs and add those paths to the state too. -fn path_type_def(state: &mut state::Compiler, path: &path::Path, type_def: TypeDef) { - let query_types = state.path_query_types_mut(); - - // Remove any current typedefs that start with this path. - query_types.retain(|key, _| !key.starts_with(&path)); - - // Insert the current path type def. - query_types.insert(path.clone(), type_def.clone()); - - // Recursively insert new ones from the inner type def. - // Note we are not handling Array inner types, since array indexing - // is fallible (there may not be enough elements in the array) any - // indexing needs to be handled. - // This may change in future. - if let Some(InnerTypeDef::Map(map)) = type_def.inner_type_def { - for (field, type_def) in map { - if let Ok(field) = Field::from_str(&field) { - let mut path = path.clone(); - path.append(Segment::Field(field)); - path_type_def(state, &path, type_def); - } - } - } -} - -impl Assignment { - pub fn new(target: Target, value: Box, state: &mut state::Compiler) -> Self { - let type_def = value.type_def(state); - - let var_type_def = |state: &mut state::Compiler, var: &Variable, type_def| { - state - .variable_types_mut() - .insert(var.ident().to_owned(), type_def); - }; - - match &target { - Target::Variable(var) => var_type_def(state, var, type_def), - Target::Path(path) => path_type_def(state, path.as_ref(), type_def), - Target::Infallible { ok, err } => { - // If the type definition of the rhs expression is infallible, - // then an infallible assignment is redundant. - // - // This invariant is upheld (for now) by the parser. - assert!(type_def.is_fallible()); - - // "ok" target takes on the type definition of the value, but is - // set to being infallible, as the error will be captured by the - // "err" target. - let type_def = type_def.into_fallible(false); - - match ok.as_ref() { - Target::Variable(var) => var_type_def(state, var, type_def), - Target::Path(path) => path_type_def(state, path.as_ref(), type_def), - Target::Infallible { .. } => unimplemented!("nested infallible target"), - } - - // "err" target is assigned `null` or a string containing the - // error message. - let err_type_def = TypeDef { - kind: Kind::Bytes | Kind::Null, - ..Default::default() - }; - - match err.as_ref() { - Target::Variable(var) => var_type_def(state, var, err_type_def), - Target::Path(path) => path_type_def(state, path.as_ref(), err_type_def), - Target::Infallible { .. } => unimplemented!("nested infallible target"), - } - } - } - - Self { target, value } - } -} - -impl Expression for Assignment { - fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result { - let value = self.value.execute(state, object); - - // ignoring the unnecessariy wrap as this whole parser is going away momentarily and this - // matches up better with the other *_assignment methods anyway - #[allow(clippy::unnecessary_wraps)] - fn var_assignment<'a>( - state: &mut state::Program, - var: &Variable, - value: &'a Value, - ) -> Result<&'a Value> { - state - .variables_mut() - .insert(var.ident().to_owned(), value.to_owned()); - - Ok(value) - } - - fn path_assignment<'a>( - object: &mut dyn Object, - path: &Path, - value: &'a Value, - ) -> Result<&'a Value> { - object - .insert(path.as_ref(), value.to_owned()) - .map_err(|e| E::Assignment(Error::PathInsertion(e)))?; - - Ok(value) - } - - match &self.target { - Target::Variable(var) => var_assignment(state, var, &value?).map(ToOwned::to_owned), - Target::Path(path) => path_assignment(object, path, &value?).map(ToOwned::to_owned), - Target::Infallible { ok, err } => { - let (ok_value, err_value) = match value { - Ok(value) => (value, Value::Null), - Err(err) => (Value::Null, Value::from(err)), - }; - - match ok.as_ref() { - Target::Variable(var) => var_assignment(state, var, &ok_value)?, - Target::Path(path) => path_assignment(object, path, &ok_value)?, - Target::Infallible { .. } => unimplemented!("nested infallible target"), - }; - - match err.as_ref() { - Target::Variable(var) => var_assignment(state, var, &err_value)?, - Target::Path(path) => path_assignment(object, path, &err_value)?, - Target::Infallible { .. } => unimplemented!("nested infallible target"), - }; - - if err_value.is_null() { - Ok(ok_value) - } else { - Ok(err_value) - } - } - } - } - - fn type_def(&self, state: &state::Compiler) -> TypeDef { - let var_type_def = |var: &Variable| { - state - .variable_type(var.ident().to_owned()) - .cloned() - .expect("variable must be assigned via Assignment::new") - }; - - let path_type_def = |path: &Path| { - state - .path_query_type(path) - .cloned() - .expect("path must be assigned via Assignment::new") - }; - - match &self.target { - Target::Variable(var) => var_type_def(var), - Target::Path(path) => path_type_def(path), - Target::Infallible { ok, err } => { - let ok_type_def = match ok.as_ref() { - Target::Variable(var) => var_type_def(var), - Target::Path(path) => path_type_def(path), - Target::Infallible { .. } => unimplemented!("nested infallible target"), - }; - - // Technically the parser rejects this invariant, because an - // expression that is known to be infallible cannot be assigned - // to an infallible target, since the error will always be - // `null`. - if !ok_type_def.is_fallible() { - return ok_type_def; - } - - let err_type_def = match err.as_ref() { - Target::Variable(var) => var_type_def(var), - Target::Path(path) => path_type_def(path), - Target::Infallible { .. } => unimplemented!("nested infallible target"), - }; - - ok_type_def.merge(err_type_def).into_fallible(false) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - expression::{Arithmetic, Literal}, - lit, path, test_type_def, Operator, - }; - - #[test] - fn path_typedef_added_to_state() { - let mut state = state::Compiler::default(); - - // Assign a type with inner type to a path. - let path = path::Path::from_str(".ook").unwrap(); - path_type_def( - &mut state, - &path, - TypeDef::from(Kind::Map) - .with_inner_type(Some(crate::inner_type_def!({ "flork": Kind::Integer }))), - ); - - assert_eq!( - Some(Kind::Map), - state - .path_query_type(Box::new(path)) - .map(|typedef| typedef.kind) - ); - - let nested_path = path::Path::from_str(".ook.flork").unwrap(); - - assert_eq!( - Some(Kind::Integer), - state - .path_query_type(Box::new(nested_path.clone())) - .map(|typedef| typedef.kind) - ); - - // Assign a different type to this path. - let path = path::Path::from_str(".ook").unwrap(); - path_type_def(&mut state, &path, Kind::Bytes.into()); - - // Ensure the inner type is no longer defined. - assert_eq!( - None, - state - .path_query_type(Box::new(nested_path)) - .map(|typedef| typedef.kind) - ); - } - - test_type_def![ - variable { - expr: |state: &mut state::Compiler| { - let target = Target::Variable(Variable::new("foo".to_owned(), None)); - let value = Box::new(Literal::from(true).into()); - - Assignment::new(target, value, state) - }, - def: TypeDef { - kind: Kind::Boolean, - ..Default::default() - }, - } - - path { - expr: |state: &mut state::Compiler| { - let target = Target::Path(Path::from("foo")); - let value = Box::new(Literal::from("foo").into()); - - Assignment::new(target, value, state) - }, - def: TypeDef { - kind: Kind::Bytes, - ..Default::default() - }, - } - - infallible { - expr: |state: &mut state::Compiler| { - let ok = Box::new(Target::Variable(Variable::new("ok".to_owned(), None))); - let err = Box::new(Target::Variable(Variable::new("err".to_owned(), None))); - - let target = Target::Infallible { ok, err }; - let value = Box::new(Arithmetic::new( - Box::new(lit!(true).into()), - Box::new(lit!(3).into()), - Operator::Multiply, - ).into()); - - Assignment::new(target, value, state) - }, - def: TypeDef { - fallible: false, - kind: Kind::Bytes | Kind::Integer | Kind::Float, - ..Default::default() - }, - } - ]; -} diff --git a/lib/remap-lang/src/parser.rs b/lib/remap-lang/src/parser.rs deleted file mode 100644 index d2444c40bdc94..0000000000000 --- a/lib/remap-lang/src/parser.rs +++ /dev/null @@ -1,1389 +0,0 @@ -#![allow(clippy::or_fun_call)] - -use crate::{ - diagnostic::{self, Diagnostic, DiagnosticList, Label, Note, Span}, - expression::{ - self, function, if_statement::IfCondition, Arithmetic, Array, Assignment, Block, Function, - IfStatement, Literal, Map, Noop, Not, Path, Target, Variable, - }, - path, state, Expr, Expression, Function as Fn, Operator, Value, -}; -use pest::error::InputLocation; -use pest::iterators::{Pair, Pairs}; -use regex::{Regex, RegexBuilder}; -use std::collections::BTreeMap; -use std::ops::{Deref, DerefMut}; -use std::str::FromStr; - -pub(crate) type R = Rule; -type IResult = Result, ParserBug>; -type PestError = pest::error::Error; - -#[derive(pest_derive::Parser)] -#[grammar = "../grammar.pest"] -pub(super) struct Parser<'a> { - pub function_definitions: &'a [Box], - pub allow_regex_return: bool, - pub compiler_state: &'a mut state::Compiler, - - /// This field keeps track of the *recoverable* errors the parser - /// encountered while parsing a program source. - /// - /// If the parser can continue after an error occurs, it is collected in - /// this field. If a *fatal* error is encountered, the parses tries to move - /// on to the next expression and records the failed expression as a - /// `ParserBug`. - /// - /// The field is added to the `ParseError` return value once parsing is - /// finished. - /// - /// All parsing functions take `self` such that this state cannot leak into - /// subsequent parsing calls. - diagnostics: DiagnosticList, -} - -impl<'a> From<&Pair<'a, R>> for Span { - fn from(pair: &Pair) -> Self { - pair.as_span().into() - } -} - -impl From> for Span { - fn from(span: pest::Span) -> Self { - (span.start()..span.end()).into() - } -} - -impl From for Diagnostic { - fn from(err: PestError) -> Self { - let msg = "syntax error"; - - let span = match err.location { - InputLocation::Pos(start) => start..start, - InputLocation::Span((start, end)) => start..end, - }; - - let label = Label::primary("invalid token", span.clone()); - - match err.variant { - pest::error::ErrorVariant::ParsingError { - positives, - negatives, - } => { - let expected = if positives.len() == 1 { - Label::context(format!("expected: {}", positives[0]), span.clone()) - } else { - Label::context( - format!( - "expected one of: {}", - positives - .iter() - .map(|r| r.to_string()) - .collect::>() - .join(", ") - ), - span.clone(), - ) - }; - - let unexpected = Label::primary( - format!( - "unexpected: {}", - negatives - .iter() - .map(|r| r.to_string()) - .collect::>() - .join(", ") - ), - span.clone(), - ); - - match () { - _ if !positives.is_empty() => Diagnostic::error(msg) - .with_label(label) - .with_label(expected), - _ if !negatives.is_empty() => Diagnostic::error(msg) - .with_label(label) - .with_label(unexpected), - _ => Diagnostic::error(msg).with_primary("unexpected token", span), - } - } - pest::error::ErrorVariant::CustomError { message } => { - Diagnostic::error(msg).with_primary(message, span) - } - } - } -} - -// ----------------------------------------------------------------------------- - -/// A non-recoverable error raised by the parser. -/// -/// The parser tries to recover the next expression it tries to parse, if -/// possible. -#[derive(Debug, Clone)] -pub(crate) struct ParserBug(Span, R); - -impl From for Diagnostic { - fn from(err: ParserBug) -> Self { - Diagnostic::bug("unexpected token").with_primary(err.1.to_string(), err.0) - } -} - -// ----------------------------------------------------------------------------- - -/// A container type that wraps an [`Expression`] and adds a span pointing to -/// the expression position within the parsed source. -#[derive(Clone, Debug, PartialEq)] -pub struct ParsedExpression { - span: Span, - expr: Expr, -} - -impl ParsedExpression { - pub fn span(&self) -> Span { - self.span - } -} - -impl Deref for ParsedExpression { - type Target = Expr; - - fn deref(&self) -> &Self::Target { - &self.expr - } -} - -impl DerefMut for ParsedExpression { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.expr - } -} - -impl> From> for ParsedExpression { - fn from(node: ParsedNode) -> Self { - Self { - span: node.span, - expr: node.inner.into(), - } - } -} - -// ----------------------------------------------------------------------------- - -/// Similar to [`ParsedExpression`] except that it is private, generic over `T` -/// and has an expanded API used within the parser. -#[derive(Clone, Debug, PartialEq)] -struct ParsedNode { - span: Span, - inner: T, -} - -impl Deref for ParsedNode { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -impl DerefMut for ParsedNode { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner - } -} - -impl ParsedNode { - fn into_inner(self) -> T { - self.inner - } - - fn take(self) -> (Span, T) { - (self.span, self.inner) - } - - #[allow(clippy::wrong_self_convention)] - fn to_expr(self) -> ParsedNode - where - T: Into, - { - let (span, inner) = self.take(); - - ParsedNode { - span, - inner: inner.into(), - } - } -} - -impl, S: Into> From<(S, U)> for ParsedNode { - fn from((span, node): (S, U)) -> Self { - Self { - span: span.into(), - inner: node.into(), - } - } -} - -// ----------------------------------------------------------------------------- - -// Auto-generate a set of parser functions to parse different operations. -macro_rules! operation_fns { - (@impl $($rule:tt => { op: [$head_op:path, $($tail_op:path),+ $(,)?], next: $next:tt, })+) => ( - $( - paste::paste! { - fn [<$rule _from_pair>](&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut pairs = pair.into_inner(); - - let next = pairs.next().ok_or(e(R::$rule, span))?; - let (span, mut lhs) = self.[<$next _from_pair>](next)?.take(); - let mut op = Operator::$head_op; - - for pair in pairs { - match pair.as_rule() { - R::[] => { - op = Operator::from_str(pair.as_str()).map_err(|_| e(R::$rule, span))?; - } - _ => { - lhs = Expr::from(Arithmetic::new( - Box::new(lhs), - Box::new(self.[<$next _from_pair>](pair)?.into_inner()), - op.clone(), - )); - } - } - } - - Ok((span, lhs).into()) - } - } - )+ - ); - - ($($rule:tt => { op: [$($op:path),+ $(,)?], next: $next:tt, })+) => ( - operation_fns!(@impl $($rule => { op: [$($op),+], next: $next, })+); - ); -} - -impl<'a> Parser<'a> { - pub fn new( - function_definitions: &'a [Box], - compiler_state: &'a mut state::Compiler, - allow_regex_return: bool, - ) -> Self { - Self { - function_definitions, - allow_regex_return, - compiler_state, - diagnostics: DiagnosticList::default(), - } - } - - /// Parse a source string into a valid [`Program`]. - pub(crate) fn program_from_str( - mut self, - source: &'a str, - ) -> diagnostic::Result> { - let expressions = self - .pairs_from_str(R::program, source) - .and_then(|pairs| self.pairs_to_expressions(pairs.into_inner())) - .map(|node| node.into_inner()) - .map(|nodes| nodes.into_iter().map(Into::into).collect::>()) - .map_err(|err| self.diagnostics.push(err.into())); - - match expressions { - Err(_) => Err(self.diagnostics), - Ok(_) if self.diagnostics.is_err() => Err(self.diagnostics), - Ok(expressions) => Ok((expressions, self.diagnostics)), - } - } - - /// Parse a string path into a [`path::Path`] wrapper with easy access to - /// individual path [`path::Segment`]s. - pub(crate) fn path_from_str(mut self, path: &'a str) -> diagnostic::Result { - let path = self - .pairs_from_str(R::rule_path, path) - .and_then(|pairs| pairs.into_inner().next().ok_or(e(R::rule_path, path))) - .and_then(|pair| self.path_from_pair(pair)) - .map(|node| node.into_inner()) - .map_err(|err| self.diagnostics.push(err.into())); - - match path { - Err(_) => Err(self.diagnostics), - Ok(_) if self.diagnostics.is_err() => Err(self.diagnostics), - Ok(path) => Ok((path, self.diagnostics)), - } - } - - /// Parse a string into a [`path::Field`] wrapper. - /// - /// Depending on the provided string, this can result in three outcomes: - /// - /// - A `Field::Regular` if the string is a valid "identifier". - /// - A `Field::Quoted` if the string is a valid "quoted string". - /// - An error if neither is true. - /// - /// These rules are defined by the Remap parser. - pub(crate) fn path_field_from_str(mut self, field: &'a str) -> diagnostic::Result { - let field = self - .pairs_from_str(R::rule_ident, field) - .and_then(|pairs| pairs.into_inner().next().ok_or(e(R::rule_ident, field))) - .map(|pair| path::Field::Regular(pair.as_str().to_owned())) - .or_else(|_| { - self.diagnostics.clear(); - - self.pairs_from_str(R::rule_string_inner, field) - .map(|node| node.into_inner()) - .and_then(|mut pairs| pairs.next().ok_or(e(R::rule_string_inner, field))) - .map(|pair| path::Field::Quoted(pair.as_str().to_owned())) - }) - .map_err(|err| self.diagnostics.push(err.into())); - - match field { - Err(_) => Err(self.diagnostics), - Ok(_) if self.diagnostics.is_err() => Err(self.diagnostics), - Ok(field) => Ok((field, self.diagnostics)), - } - } - - /// Converts the set of known "root" rules into boxed [`Expression`] trait - /// objects. - fn pairs_to_expressions(&mut self, pairs: Pairs<'a, R>) -> IResult>> { - let mut nodes = vec![]; - - for pair in pairs { - match pair.as_rule() { - R::assignment | R::boolean_expr | R::block | R::if_statement => { - nodes.push(self.expression_from_pair(pair)?) - } - R::EOI => (), - _ => return Err(e(R::expression, &pair)), - } - } - - if let Some(node) = nodes.last() { - let td = node.type_def(&self.compiler_state); - - if !self.allow_regex_return && !td.kind.is_all() && td.scalar_kind().contains_regex() { - self.diagnostics.push( - Diagnostic::error("invalid return value") - .with_primary("regex return value not allowed", node.span), - ); - } - } - - let start = nodes.first().map(|n| n.span.start).unwrap_or_default(); - let end = nodes.last().map(|n| n.span.end).unwrap_or_default(); - - Ok((start..end, nodes).into()) - } - - // ignoring the unnecessariy wrap as this whole parser is going away momentarily and this - // matches up better with the other *_from_* methods anyway - #[allow(clippy::unnecessary_wraps)] - fn pairs_from_str<'b>(&mut self, rule: R, source: &'b str) -> IResult> { - use pest::Parser; - - let span = Span::from(source); - let pairs = match Self::parse(rule, source) { - Ok(pairs) => pairs, - Err(err) => { - self.diagnostics.push(err.into()); - pest::state("", Ok).unwrap() - } - }; - - Ok((span, pairs).into()) - } - - /// Given a `Pair`, build a boxed [`Expression`] trait object from it. - fn expression_from_pair(&mut self, pair: Pair) -> IResult { - match pair.as_rule() { - R::assignment => self.assignment_from_pair(pair), - R::boolean_expr => self.boolean_expr_from_pair(pair), - R::block => self.block_from_pair(pair), - R::if_statement => self.if_statement_from_pair(pair), - _ => Err(e(R::expression, &pair)), - } - } - - fn assignment_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut pairs = pair.into_inner(); - - let (target_span, mut target) = self - .target_from_pair(pairs.next().ok_or(e(R::assignment, span))?)? - .take(); - let (expression_span, expression) = self - .expression_from_pair(pairs.next().ok_or(e(R::assignment, span))?)? - .take(); - - let assignment_span = target_span.start..expression_span.end; - - if let Target::Infallible { ok, err } = &target { - if !expression.type_def(&self.compiler_state).is_fallible() { - self.diagnostics.push( - Diagnostic::error("unnecessary error assignment") - .with_primary("this error assignment is unnecessary", target_span) - .with_context("because this expression cannot fail", expression_span) - .with_note(Note::InfallibleAssignment { - ok: ok.to_string(), - err: err.to_string(), - }), - ); - - target = Target::Variable(Variable::new("_".to_owned(), None)); - } - } - - let assignment = Assignment::new(target, Box::new(expression), &mut self.compiler_state); - - Ok((assignment_span, assignment).into()) - } - - /// Return the target type to which a value is being assigned. - /// - /// This can either return a `variable` or a `target_path` target, depending - /// on the parser rule being processed. - fn target_from_pair(&mut self, pair: Pair) -> IResult { - match pair.as_rule() { - R::variable => self.variable_from_pair(pair).map(|node| { - let (span, mut variable) = node.take(); - - // track an error diagnostic and re-assign variable to a - // variable without a path, since we don't support this - // currently. - if let Some(path) = variable.path() { - let path_span = span.end - path.to_string().bytes().len()..span.end; - let variable_span = span.start..path_span.start; - - self.diagnostics.push( - Diagnostic::error("path-based variable assignment") - .with_primary( - "assignment to variable path currently unsupported", - path_span, - ) - .with_context( - format!(r#"assign to "{}" instead"#, variable.ident()), - variable_span, - ), - ); - - variable = Variable::new("_".to_owned(), None); - } - - (span, Target::Variable(variable)).into() - }), - R::path => { - let (span, path) = self.path_from_pair(pair)?.take(); - Ok((span, Target::Path(Path::new(path))).into()) - } - R::target_infallible => self - .target_infallible_from_pair(pair) - .map(|node| node.take().into()), - _ => Err(e(R::target, &pair)), - } - } - - fn target_infallible_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut pairs = pair.into_inner(); - - let (ok_span, ok) = pairs - .next() - .ok_or(e(R::target_infallible, span)) - .and_then(|pair| Ok(self.target_from_pair(pair)?))? - .take(); - - let (err_span, err) = pairs - .next() - .ok_or(e(R::target_infallible, span)) - .and_then(|pair| Ok(self.target_from_pair(pair)?))? - .take(); - - let (ok, err) = (Box::new(ok), Box::new(err)); - - Ok(( - Span::new(ok_span.start, err_span.end), - Target::Infallible { ok, err }, - ) - .into()) - } - - /// Parse block expressions. - fn block_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut expressions = vec![]; - - for pair in pair.into_inner() { - expressions.push(self.expression_from_pair(pair)?.into_inner()); - } - - Ok((span, Block::new(expressions)).into()) - } - - /// Parse if-statement expressions. - fn if_statement_from_pair(&mut self, pair: Pair) -> IResult { - self.compiler_state.track_changes(); - - let span = Span::from(&pair); - let mut pairs = pair.into_inner(); - - // if condition - let conditional = self - .if_condition_from_pair(pairs.next().ok_or(e(R::if_statement, span))?)? - .into_inner(); - - // If the conditional failed to parse, it has created an error - // diagnostic. We return a no-op expression. This allows us to continue - // parsing. - let conditional = match self.try_or_noop(conditional, span) { - Ok(conditional) => conditional, - Err(noop) => return noop, - }; - - let true_expression = self - .expression_from_pair(pairs.next().ok_or(e(R::if_statement, span))?)? - .into_inner(); - - // else condition - let mut false_expression = pairs - .next_back() - .map(|pair| self.expression_from_pair(pair)) - .transpose()? - .map(ParsedNode::into_inner) - .unwrap_or_else(|| Expr::from(Noop)); - - let mut pairs = pairs.rev().peekable(); - - // optional if-else conditions - while let Some(pair) = pairs.next() { - let (conditional, true_expression) = match pairs.peek().map(Pair::as_rule) { - Some(R::block) | None => { - let conditional = self.if_condition_from_pair(pair)?.into_inner(); - let conditional = match self.try_or_noop(conditional, span) { - Ok(conditional) => conditional, - Err(noop) => return noop, - }; - - let true_expression = false_expression; - false_expression = Noop.into(); - - (conditional, true_expression) - } - Some(R::if_condition) => { - let next_pair = pairs.next().ok_or(e(R::if_statement, span))?; - - let conditional = self.if_condition_from_pair(next_pair)?.into_inner(); - let conditional = match self.try_or_noop(conditional, span) { - Ok(conditional) => conditional, - Err(noop) => return noop, - }; - - let true_expression = self.expression_from_pair(pair)?.into_inner(); - - (conditional, true_expression) - } - _ => return Err(e(R::if_statement, span)), - }; - - false_expression = IfStatement::new( - conditional, - Box::new(true_expression), - Box::new(false_expression), - ) - .into(); - } - - let node = IfStatement::new( - conditional, - Box::new(true_expression), - Box::new(false_expression), - ); - - Ok((span, node).into()) - } - - fn if_condition_from_pair(&mut self, pair: Pair) -> IResult> { - let span = Span::from(&pair); - let mut pairs = pair.clone().into_inner(); - - let (span, expression) = if let Some(R::boolean_expr) = pairs.peek().map(|p| p.as_rule()) { - let pair = pairs.next().ok_or(e(R::if_condition, span))?; - self.expression_from_pair(pair)?.take() - } else { - self.block_from_pair(pair)?.take() - }; - - // If the condition is invalid, we add a diagnostic error. We also let - // the callee know parsing the condition failed. - let result = IfCondition::new(Box::new(expression), &self.compiler_state) - .map_err(|err| self.diagnostics.push((span, err).into())); - - Ok((span, result).into()) - } - - /// Parse not operator, or fall-through to primary values or function calls. - fn not_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let pairs = pair.into_inner(); - - let mut count = 0; - let mut expression = Expr::from(Noop); - - for pair in pairs { - match pair.as_rule() { - R::operator_not => count += 1, - R::primary => expression = self.primary_from_pair(pair)?.into_inner(), - R::call => expression = self.call_from_pair(pair)?.into_inner(), - _ => return Err(e(R::not, &pair)), - } - } - - if count % 2 != 0 { - expression = Expr::from(Not::new(Box::new(expression))) - } - - Ok((span, expression).into()) - } - - /// Parse one of possible primary expressions. - fn primary_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let pair = pair.into_inner().next().ok_or(e(R::primary, span))?; - - match pair.as_rule() { - R::value => self.literal_from_pair(pair.into_inner().next().ok_or(e(R::value, span))?), - R::variable => self.variable_from_pair(pair).map(ParsedNode::to_expr), - R::path => self.path_from_pair(pair).map(|node| { - let (span, path) = node.take(); - (span, Path::new(path)).into() - }), - R::group => { - self.expression_from_pair(pair.into_inner().next().ok_or(e(R::group, span))?) - } - _ => Err(e(R::primary, &pair)), - } - } - - /// Parse a [`Value`] into a [`Literal`] expression. - fn literal_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - - match pair.as_rule() { - R::string => self.string_from_pair(pair).map(ParsedNode::to_expr), - R::null => Ok((span, Literal::from(Value::Null)).into()), - R::boolean => Ok((span, Literal::from(pair.as_str() == "true")).into()), - R::integer => Ok(( - span, - Literal::from( - pair.as_str() - .parse::() - .map_err(|_| e(R::integer, &pair))?, - ), - ) - .into()), - R::float => Ok(( - span, - Literal::from( - pair.as_str() - .parse::() - .map_err(|_| e(R::float, &pair))?, - ), - ) - .into()), - R::array => self.array_from_pair(pair).map(ParsedNode::to_expr), - R::map => self.map_from_pair(pair).map(ParsedNode::to_expr), - R::regex => self.regex_from_pair(pair).map(ParsedNode::to_expr), - _ => Err(e(R::value, &pair)), - } - } - - fn array_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - - let expressions = pair - .into_inner() - .map(|pair| self.expression_from_pair(pair).map(ParsedNode::into_inner)) - .collect::, _>>()?; - - Ok((span, Array::new(expressions)).into()) - } - - fn map_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - - let map = pair - .into_inner() - .map(|pair| self.kv_from_pair(pair).map(ParsedNode::into_inner)) - .collect::, _>>()?; - - Ok((span, Map::new(map)).into()) - } - - fn kv_from_pair(&mut self, pair: Pair) -> IResult<(String, Expr)> { - let span = Span::from(&pair); - let mut inner = pair.into_inner(); - - let pair = inner.next().ok_or(e(R::kv_pair, span))?; - let (key_span, key) = self.string_from_pair(pair)?.take(); - - let pair = inner.next().ok_or(e(R::kv_pair, span))?; - let (expr_span, expr) = self.expression_from_pair(pair)?.take(); - - Ok((key_span.start..expr_span.end, (key, expr)).into()) - } - - /// Parse function call expressions. - fn call_from_pair(&mut self, pair: Pair) -> IResult { - self.compiler_state.track_changes(); - - let span = Span::from(&pair); - let mut inner = pair.into_inner(); - - let ident = inner.next().ok_or(e(R::call, span))?.as_str(); - let ident_span = span.start..=ident.bytes().len(); - - let abort_on_error = match inner.peek().map(|p| p.as_rule()) { - Some(R::bang) => { - inner.next(); - true - } - _ => false, - }; - - let (arguments_span, arguments) = inner - .next() - .map(|pair| self.arguments_from_pair(pair)) - .transpose()? - .map(|s| s.take()) - .unwrap_or_else(|| (Span::default(), vec![])); - - let function = Function::new( - ident, - abort_on_error, - arguments, - &self.function_definitions, - &self.compiler_state, - ); - - let expression: Expr = match function { - Ok(function) => function.into(), - Err(err) => { - self.compiler_state.revert_changes(); - - self.diagnostics.push(match err { - function::Error::Undefined => Diagnostic::error("call to undefined function") - .with_primary("undefined function", ident_span), - function::Error::ArityMismatch { max, got } => { - Diagnostic::error("function argument arity mismatch") - .with_primary(format!("got: {}", got), arguments_span) - .with_context(format!("expected: {} (at most)", max), arguments_span) - } - // TODO: have spans for each individual keyword - function::Error::UnknownKeyword(kw) => { - Diagnostic::error("unknown function argument keyword") - .with_primary(format!("unknown keyword: {}", kw), arguments_span) - } - function::Error::AbortInfallible => { - let bang_span = *ident_span.end() + 1..*ident_span.end() + 1; - - Diagnostic::error("cannot abort function that never fails") - .with_primary("this function cannot fail", ident_span) - .with_context("remove this abort-instruction", bang_span) - .with_note(Note::SeeErrDocs) - } - function::Error::MissingArg { argument, .. } => { - Diagnostic::error("function argument missing") - .with_primary(format!("required argument missing: {}", argument), span) - } - function::Error::Compile(message) => { - Diagnostic::error("unable to parse function").with_primary(message, span) - } - function::Error::Argument(arg, err) => { - Diagnostic::error("function argument error") - .with_primary(format!("{}: {}", arg, err), arguments_span) - } - }); - - Noop.into() - } - }; - - Ok((span, expression).into()) - } - - /// Parse into a vector of argument properties. - fn arguments_from_pair(&mut self, pair: Pair) -> IResult, Expr)>> { - let span = Span::from(&pair); - - let arguments = pair - .into_inner() - .map(|pair| self.argument_from_pair(pair).map(ParsedNode::into_inner)) - .collect::, _>>()?; - - Ok((span, arguments).into()) - } - - /// Parse optional argument keyword and [`Argument`] value. - fn argument_from_pair(&mut self, pair: Pair) -> IResult<(Option, Expr)> { - let span = Span::from(&pair); - let mut ident = None; - - for pair in pair.into_inner() { - match pair.as_rule() { - // This matches first, if a keyword is provided. - R::ident => ident = Some(pair.as_str().to_owned()), - _ => { - return Ok( - (span, (ident, self.expression_from_pair(pair)?.into_inner())).into(), - ) - } - } - } - - Err(e(R::argument, span)) - } - - /// Parse a [`Regex`] value - fn regex_from_pair(&mut self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut inner = pair.into_inner(); - - let pattern = inner - .next() - .ok_or(e(R::regex_inner, span))? - .as_str() - .replace("\\/", "/"); - - let (x, i, m) = inner - .next() - .map(|flags| { - flags - .as_str() - .chars() - .fold((false, false, false), |(x, i, m), flag| match flag { - 'x' => (true, i, m), - 'i' => (x, true, m), - 'm' => (x, i, true), - _ => (x, i, m), - }) - }) - .unwrap_or_default(); - - #[allow(clippy::trivial_regex)] - let regex = RegexBuilder::new(&pattern) - .case_insensitive(i) - .multi_line(m) - .ignore_whitespace(x) - .build() - .map_err(|err| { - let error = err - .to_string() - .split("error: ") - .last() - .unwrap_or("unknown error") - .to_owned(); - - // Record error diagnostic for invalid regex. - self.diagnostics.push( - Diagnostic::error("regex parsing unsuccessful") - .with_primary("invalid regex", span) - .with_primary(format!("error: {}", error), span), - ) - }) - .unwrap_or_else(|_| Regex::new("").unwrap()); - - Ok((span, regex).into()) - } - - /// Parse a [`Path`] value, e.g. ".foo.bar" - fn path_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut pairs = pair.into_inner(); - - // If no segments are provided, it's the root path (e.g. `.`). - let path_segments = match pairs.next() { - Some(path_segments) => path_segments, - None => return Ok((span, path::Path::root()).into()), - }; - - let segments = match path_segments.as_rule() { - R::path_segments => self.path_segments_from_pair(path_segments)?, - _ => return Err(e(R::path, &path_segments)), - }; - - Ok((span, path::Path::new_unchecked(segments.into_inner())).into()) - } - - fn path_segments_from_pair(&self, pair: Pair) -> IResult> { - let span = Span::from(&pair); - - let segments: Vec = pair - .into_inner() - .map(|pair| match pair.as_rule() { - R::path_index => self.path_index_from_pair(pair).map(ParsedNode::into_inner), - R::path_segment => self - .path_segment_from_pair(pair) - .map(ParsedNode::into_inner), - _ => Err(e(R::path_segments, &pair)), - }) - .collect::>()?; - - Ok((span, segments).into()) - } - - fn path_segment_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let segment = pair.into_inner().next().ok_or(e(R::path_segment, span))?; - - match segment.as_rule() { - R::path_field => self.path_field_segment_from_pair(segment), - R::path_coalesce => self.path_coalesce_segment_from_pair(segment), - _ => Err(e(R::path_segment, &segment)), - } - } - - fn path_field_segment_from_pair(&self, pair: Pair) -> IResult { - self.path_field_from_pair(pair).map(|node| { - let (span, field) = node.take(); - (span, path::Segment::Field(field)).into() - }) - } - - fn path_coalesce_segment_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - - let fields = pair - .into_inner() - .map(|pair| self.path_field_from_pair(pair).map(ParsedNode::into_inner)) - .collect::, _>>()?; - - Ok((span, path::Segment::Coalesce(fields)).into()) - } - - fn path_field_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let field = pair.into_inner().next().ok_or(e(Rule::path_field, span))?; - - match field.as_rule() { - R::string => Ok(( - span, - path::Field::Quoted(self.string_from_pair(field)?.into_inner()), - ) - .into()), - R::field => Ok((span, path::Field::Regular(field.as_str().to_owned())).into()), - _ => Err(e(R::path_field, &field)), - } - } - - fn path_index_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let index = pair - .into_inner() - .next() - .ok_or(e(R::path_index, span))? - .as_str() - .parse::() - .map_err(|_| e(R::path_index_inner, span))?; - - Ok((span, path::Segment::Index(index)).into()) - } - - /// Parse a [`Variable`] value, e.g. "$foo" - fn variable_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let mut inner = pair.into_inner(); - - let ident = inner - .next() - .ok_or(e(R::variable, span))? - .as_str() - .to_owned(); - - let segments = inner.try_fold(vec![], |mut segments, pair| { - match pair.as_rule() { - R::path_index => segments.push(self.path_index_from_pair(pair)?.into_inner()), - R::path_segments => { - segments.append(&mut self.path_segments_from_pair(pair)?.into_inner()) - } - _ => return Err(e(R::variable, &pair)), - }; - - Ok(segments) - })?; - - let expr = match segments { - _ if segments.is_empty() => None, - _ => { - let path = path::Path::new_unchecked(segments); - Some(expression::Path::new(path)) - } - }; - - Ok((span, Variable::new(ident, expr)).into()) - } - - fn string_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - let string = pair.into_inner().next().ok_or(e(R::string, span))?; - self.escaped_string_from_pair(string) - } - - fn escaped_string_from_pair(&self, pair: Pair) -> IResult { - let span = Span::from(&pair); - - // This is only executed once per string at parse time, and so I'm not - // losing sleep over the reallocation. However, if we want to mutate the - // underlying string then we can take some inspiration from: - // - // https://github.com/rust-lang/rust/blob/master/src/librustc_lexer/src/unescape.rs - - let literal_str = pair.as_str(); - let mut escaped_chars: Vec = Vec::with_capacity(literal_str.len()); - - let mut is_escaped = false; - for c in literal_str.chars() { - if is_escaped { - match c { - '\\' => escaped_chars.push(c), - 'n' => escaped_chars.push('\n'), - 't' => escaped_chars.push('\t'), - '"' => escaped_chars.push('"'), - _ => return Err(e(Rule::char, &pair)), - } - is_escaped = false; - } else if c == '\\' { - is_escaped = true; - } else { - escaped_chars.push(c); - } - } - - Ok((span, escaped_chars.into_iter().collect::()).into()) - } - - /// Allows you to check if a result passed. If it did, the compiler starts - /// tracking new changes, if it didn't, the existing state is overwritten - /// with the previously recorded changes (if any). - fn try_or_noop(&mut self, value: Result, span: Span) -> Result> { - match value { - Ok(value) => { - self.compiler_state.track_changes(); - Ok(value) - } - Err(_) => { - self.compiler_state.revert_changes(); - Err(Ok((span, Noop).into())) - } - } - } - - // The operations are defined in reverse order, meaning boolean expressions are - // computed first, and multiplication last. - // - // The order of `op` operations defines operator precedence. - operation_fns! { - multiplication => { - op: [Multiply, Divide, IntegerDivide, Remainder], - next: not, - } - - addition => { - op: [Add, Subtract], - next: multiplication, - } - - comparison => { - op: [Greater, GreaterOrEqual, Less, LessOrEqual], - next: addition, - } - - equality => { - op: [Equal, NotEqual], - next: comparison, - } - - boolean_expr => { - op: [ErrorOr, And, Or], - next: equality, - } - } -} - -// ----------------------------------------------------------------------------- - -#[inline] -fn e(rule: R, span: impl Into) -> ParserBug { - ParserBug(span.into(), rule) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn rule_root_path() { - let cases = vec![ - ( - ".", - vec![], - Ok(vec![Path::new(path::Path::new_unchecked(vec![])).into()]), - ), - ( - " . ", - vec![], - Ok(vec![Path::new(path::Path::new_unchecked(vec![])).into()]), - ), - ( - ".\n", - vec![], - Ok(vec![Path::new(path::Path::new_unchecked(vec![])).into()]), - ), - ( - "\n.", - vec![], - Ok(vec![Path::new(path::Path::new_unchecked(vec![])).into()]), - ), - ( - "\n.\n", - vec![], - Ok(vec![Path::new(path::Path::new_unchecked(vec![])).into()]), - ), - // TODO: move to `remap-tests` - // ("..", vec![" 1:2\n", "= expected path segment"], Ok(vec![])), - // (". bar", vec![" 1:3\n", "= expected operator"], Ok(vec![])), - // ( - // r#". "bar""#, - // vec![" 1:2\n", "= expected path segment"], // TODO: improve error message - // Ok(vec![]), - // ), - ]; - - validate_rule(cases); - } - - #[allow(clippy::type_complexity)] - fn validate_rule(cases: Vec<(&str, Vec<&str>, Result, ()>)>) { - for (mut i, (source, compile_check, run_check)) in cases.into_iter().enumerate() { - let compile_check: Vec<&str> = compile_check; - i += 1; - - let mut state = state::Compiler::default(); - let parser = Parser::new(&[], &mut state, true); - let pairs = parser - .program_from_str(source) - .map_err(|err| diagnostic::Formatter::new(source, err).to_string()); - - match pairs { - Ok((got, _)) => { - if compile_check.is_empty() { - let got = got.into_iter().map(|e| e.expr).collect(); - - assert_eq!(Ok(got), run_check, "test case: {}", i) - } else { - for exp in compile_check { - assert!( - "".contains(exp), - "expected error: {}\nwith source: {}\nresult: {:?}\n test case {}", - exp, - source, - got, - i - ); - } - } - } - Err(err) if !compile_check.is_empty() => { - for exp in compile_check { - assert!( - err.contains(exp), - "expected: {}\nwith source: {}\nfull error message: {}\n test case {}", - exp, - source, - err, - i - ); - } - } - Err(err) => panic!("expected no error, got \"{}\" for test case {}", err, i), - } - } - } - - // TODO: move to `remap-tests` - // #[test] - // fn check_parser_errors() { - // let cases = vec![ - // ( - // ".foo bar", - // vec![ - // " 1:6\n", - // "= expected operator", - // ], - // ), - // ( - // ".=", - // vec![ - // " 1:3\n", - // "= expected assignment, if-statement, query, or block", - // ], - // ), - // ( - // ".foo = !", - // vec![ - // " 1:9\n", - // "= expected value, variable, path, group or function call, value, variable, path, group, !", - // ], - // ), - // ( - // r#".foo.bar = "baz" and this"#, - // vec![ - // " 1:18\n", - // "= expected operator", - // ], - // ), - // (r#".foo.bar = "baz" +"#, vec![" 1:19", "= expected query"]), - // ( - // ".foo.bar = .foo.(bar |)", - // vec![" 1:23\n", "= expected path field"], - // ), - // ( - // r#"if .foo > 0 { .foo = "bar" } else"#, - // vec![" 1:34\n", "= expected block"], - // ), - // ( - // "if .foo { }", - // vec![ - // " 1:11\n", - // "= expected assignment, if-statement, query, or block", - // ], - // ), - // ( - // "if { del(.foo) } else { del(.bar) }", - // vec![" 1:6\n", "= expected string"], - // ), - // ( - // "if .foo > .bar { del(.foo) } else { .bar = .baz", - // // This message isn't great, ideally I'd like "expected closing bracket" - // vec![ - // " 1:48\n", - // "= expected operator or path index", - // ], - // ), - // ("only_fields(.foo,)", vec![" 1:18\n", "= expected argument or path"]), - // ("only_fields(,)", vec![" 1:13\n", "= expected argument"]), - // ( - // // Due to the explicit list of allowed escape chars our grammar - // // doesn't actually recognize this as a string literal. - // r#".foo = "invalid escape \k sequence""#, - // vec![ - // " 1:8\n", - // "= expected assignment, if-statement, query, or block", - // ], - // ), - // ( - // // Same here as above. - // r#".foo."invalid \k escape".sequence = "foo""#, - // vec![" 1:6\n", "= expected path segment"], - // ), - // ( - // // Regexes can't be parsed as part of a path - // r#".foo = split(.foo, ./[aa]/)"#, - // vec![ - // " 1:27\n", - // "= expected query", - // ], - // ), - // ( - // // we cannot assign a regular expression to a field. - // r#".foo = /ab/i"#, - // vec!["remap error: parser error: cannot assign regex to object"], - // ), - // ( - // // we cannot assign an array containing a regular expression to a field. - // r#".foo = ["ab", /ab/i]"#, - // vec!["remap error: parser error: cannot assign regex to object"], - // ), - // ( - // // We cannot assign to a regular expression. - // r#"/ab/ = .foo"#, - // vec![ - // " 1:6\n", - // "= expected operator", - // ], - // ), - // ( - // r#"/ab/"#, - // vec!["remap error: parser error: cannot return regex from program"], - // ), - // ( - // r#"foo = /ab/"#, - // vec!["remap error: parser error: cannot return regex from program"], - // ), - // ( - // r#"[/ab/]"#, - // vec!["remap error: parser error: cannot return regex from program"], - // ), - // ( - // r#" - // foo = /ab/ - // [foo] - // "#, - // vec!["remap error: parser error: cannot return regex from program"], - // ), - // ( - // r#" - // foo = [/ab/] - // foo - // "#, - // vec!["remap error: parser error: cannot return regex from program"], - // ), - // ("foo bar", vec![" 1:5\n", "= expected operator"]), - // ("[true] [false]", vec![" 1:8\n", "= expected operator"]), - - // // reserved keywords - // ("if = true", vec![" 1:4\n", "= expected query"]), - // ("else = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("for = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("while = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("loop = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("abort = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("break = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("continue = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("return = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("as = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("type = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("let = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("until = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("then = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("impl = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("in = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("self = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("this = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("use = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ("std = true", vec![" 1:1\n", "= expected assignment, if-statement, query, or block"]), - // ]; - - // for (source, exp_expressions) in cases { - // let mut state = state::Compiler::default(); - // let parser = Parser::new(&[], &mut state, false); - // let err = parser - // .program_from_str(source) - // .err() - // .map(|err| diagnostic::Formatter::new(source, err).to_string()) - // .unwrap(); - - // for exp in exp_expressions { - // assert!( - // err.contains(exp), - // "expected: {}\nwith source: {}\nfull error message: {}", - // exp, - // source, - // err - // ); - // } - // } - // } -}