Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ tree-sitter = "0.25.3"
tree-sitter-cpp = "0.23.4"
tree-sitter-rust-orchard = "0.12.0"
tree-sitter-java = "0.23.5"
tree-sitter-python = "0.25.0"
rayon = "1.11.0"
miette = { version = "7.6.0", features = ["fancy"] }

Expand Down
26 changes: 15 additions & 11 deletions docs/Tasks.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
# Tasks

1. Handle running with no log format.
1. Extract a thread id from log when available and associate with source ref.
1. Generate call stack from exceptions.
1. Support multiple source roots from CLI.
1. Serialize state for re-use on subsequent executions
- [ ] Handle running CLI with no log format.
- TSS: Doesn't this work already? I echo
the body of the log message into log2src
and it can find the message.
Comment on lines +4 to +6
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I tried it recently, but there was a panic when something was using the log format. Could we remove the log formats from the test code if they're unneeded by the test case?

- [ ] Extract a thread id from log when available and associate with source ref.
- [ ] Generate call stack from exceptions.
- [ ] Support multiple source roots from CLI.
- [ ] Serialize state for re-use on subsequent executions

## Extension

1. Work with non .log extension (.json, etc).
1. Basic test coverage
1. Support src -> log breakpoints
- [ ] Work with non .log extension (.json, etc).
- [ ] Basic test coverage
- [ ] Support src -> log breakpoints

## Languages

1. Python
1. Go
1. JavaScript
- [X] Python
- [ ] Go
- [ ] JavaScript
- [ ] Typescript
98 changes: 94 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
use itertools::Itertools;
use miette::Diagnostic;
use rayon::prelude::*;
use regex::RegexSet;
use regex::{Captures, Regex, RegexSet};
use serde::Serialize;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs::File;
use std::io;
use std::ops::RangeBounds;
use std::ops::{Deref, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::{Arc, LazyLock};
use thiserror::Error;
use tree_sitter::Language;

Expand Down Expand Up @@ -254,6 +254,7 @@ pub enum SourceLanguage {
Java,
#[serde(rename = "C++")]
Cpp,
Python,
}

impl From<SourceLanguage> for Language {
Expand All @@ -262,6 +263,7 @@ impl From<SourceLanguage> for Language {
SourceLanguage::Rust => tree_sitter_rust_orchard::LANGUAGE.into(),
SourceLanguage::Java => tree_sitter_java::LANGUAGE.into(),
SourceLanguage::Cpp => tree_sitter_cpp::LANGUAGE.into(),
SourceLanguage::Python => tree_sitter_python::LANGUAGE.into(),
}
}
}
Expand All @@ -270,12 +272,30 @@ const IDENTS_RS: &[&str] = &["debug", "info", "warn"];
const IDENTS_JAVA: &[&str] = &["logger", "log", "fine", "debug", "info", "warn", "trace"];
const IDENTS_CPP: &[&str] = &["debug", "info", "warn", "trace"];

const IDENTS_PYTHON: &[&str] = &["debug", "info", "warn", "trace"];

static RUST_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"#).unwrap()
});

static JAVA_PLACEHOLDER_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\{.*}|\\\{(.*)}"#).unwrap());

static CPP_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]|\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"#).unwrap()
});

static PYTHON_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]"#).unwrap()
});

impl SourceLanguage {
pub fn as_str(&self) -> &'static str {
match self {
SourceLanguage::Rust => "Rust",
SourceLanguage::Java => "Java",
SourceLanguage::Cpp => "C++",
SourceLanguage::Python => "Python",
}
}

Expand All @@ -284,6 +304,7 @@ impl SourceLanguage {
Some("rs") => Some(Self::Rust),
Some("java") => Some(Self::Java),
Some("h" | "hh" | "hpp" | "hxx" | "tpp" | "cc" | "cpp" | "cxx") => Some(Self::Cpp),
Some("py") => Some(Self::Python),
None | Some(_) => None,
}
}
Expand Down Expand Up @@ -339,6 +360,20 @@ impl SourceLanguage {
)
"#
}
SourceLanguage::Python => {
r#"
(
(expression_statement
(call
function: (_) @func
arguments: (argument_list .
(string) @args
)
)
)
)
"#
}
}
}

Expand All @@ -347,7 +382,34 @@ impl SourceLanguage {
SourceLanguage::Rust => IDENTS_RS,
SourceLanguage::Java => IDENTS_JAVA,
SourceLanguage::Cpp => IDENTS_CPP,
SourceLanguage::Python => IDENTS_PYTHON,
}
}

fn get_placeholder_regex(&self) -> &'static Regex {
match self {
SourceLanguage::Rust => RUST_PLACEHOLDER_REGEX.deref(),
SourceLanguage::Java => JAVA_PLACEHOLDER_REGEX.deref(),
SourceLanguage::Cpp => CPP_PLACEHOLDER_REGEX.deref(),
SourceLanguage::Python => PYTHON_PLACEHOLDER_REGEX.deref(),
}
}

fn captures_to_format_arg(&self, caps: &Captures) -> FormatArgument {
for (index, cap) in caps.iter().skip(1).enumerate() {
if let Some(cap) = cap {
return match (self, index) {
(SourceLanguage::Rust | SourceLanguage::Java | SourceLanguage::Cpp, 0) => {
FormatArgument::Named(cap.as_str().to_string())
}
(SourceLanguage::Rust | SourceLanguage::Cpp, 1) => {
FormatArgument::Positional(cap.as_str().parse().unwrap())
}
_ => unreachable!(),
};
}
}
FormatArgument::Placeholder
}
}

Expand Down Expand Up @@ -513,7 +575,7 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec
for result in results {
// println!("node.kind()={:?} range={:?}", result.kind, result.range);
match result.kind.as_str() {
"string_literal" => {
"string_literal" | "string" => {
if let Some(src_ref) = SourceRef::new(code, result) {
patterns.push(src_ref.pattern.clone());
matched.push(src_ref);
Expand Down Expand Up @@ -852,4 +914,32 @@ fn main() {
},]
);
}

const PYTHON_SOURCE: &str = r#"
def main(args):
logger.info("foo %s \N{greek small letter pi}", test_var)
logging.info(f'Hello, {args[1]}!')
logger.warning(f"warning message:\nlow disk space")
logger.info(rf"""info message:
processing started -- {args[0]}""")
"#;

#[test]
fn test_basic_python() {
let log_ref = LogRef::new("foo bar π");
let code = CodeSource::from_string(&Path::new("in-mem.py"), PYTHON_SOURCE);
let src_refs = extract_logging(&[code], &ProgressTracker::new())
.pop()
.unwrap()
.log_statements;
assert_yaml_snapshot!(src_refs);
let vars = extract_variables(&log_ref, &src_refs[0]);
assert_eq!(
vars,
vec![VariablePair {
expr: "test_var".to_string(),
value: "bar".to_string()
},]
);
}
}
52 changes: 52 additions & 0 deletions src/snapshots/log2src__tests__basic_python.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about adding test coverage here vs in the root test directory. I guess this tests the library pretty thoroughly end to end, so maybe those tests should focus more on the CLI features. Does that make sense to you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I add tests in lib.rs so that I can use the debugger. The stuff under tests spawns a separate process and I don't think I was able to attach a debugger.

source: src/lib.rs
expression: src_refs
---
- sourcePath: in-mem.py
language: Python
lineNumber: 3
endLineNumber: 3
column: 16
name: main
text: "\"foo %s \\N{greek small letter pi}\""
quality: 5
pattern: "(?s)^foo (.+) \\w$"
args:
- Placeholder
vars:
- test_var
- sourcePath: in-mem.py
language: Python
lineNumber: 4
endLineNumber: 4
column: 17
name: main
text: "f'Hello, {args[1]}!'"
quality: 7
pattern: "(?s)^Hello, (.+)!$"
args:
- Named: "args[1]"
vars: []
- sourcePath: in-mem.py
language: Python
lineNumber: 5
endLineNumber: 5
column: 19
name: main
text: "f\"warning message:\\nlow disk space\""
quality: 29
pattern: "(?s)^warning message:\\nlow disk space$"
args: []
vars: []
- sourcePath: in-mem.py
language: Python
lineNumber: 6
endLineNumber: 7
column: 16
name: main
text: "rf\"\"\"info message:\nprocessing started -- {args[0]}\"\"\""
quality: 33
pattern: "(?s)^info message:\\nprocessing started -- (.+)$"
args:
- Named: "args[0]"
vars: []
Loading
Loading