From aaacb30cbe00dcb027e1ec2fc61cd34eedd83fc2 Mon Sep 17 00:00:00 2001 From: Jiping Yin Date: Wed, 24 Dec 2025 17:57:32 +0800 Subject: [PATCH 1/2] feat: eBPF Collect Java crash log file hs_err_pid.log --- agent/src/ebpf/kernel/files_rw.bpf.c | 45 ++++- agent/src/ebpf/kernel/include/common.h | 1 + agent/src/ebpf/kernel/socket_trace.bpf.c | 3 + .../samples/rust/socket-tracer/src/main.rs | 191 +++--------------- agent/src/ebpf/user/ctrl_tracer.c | 107 +++++++++- agent/src/ebpf/user/socket.c | 70 +++++++ 6 files changed, 243 insertions(+), 174 deletions(-) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index 656dffa4770..508a38e6d5f 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -346,16 +346,30 @@ static __inline int trace_io_event_common(void *ctx, v->coroutine_id = trace_key.goid; v->timestamp = data_args->enter_ts; v->syscall_len = sizeof(*buffer); - v->source = DATA_SOURCE_IO_EVENT; + // hs_err_pid + if (buffer->filename[0] == 'h' && buffer->filename[1] == 's' && + buffer->filename[2] == '_' && buffer->filename[3] == 'e' && + buffer->filename[4] == 'r' && buffer->filename[5] == 'r' && + buffer->filename[6] == '_' && buffer->filename[7] == 'p') { + v->source = DATA_SOURCE_FILE_WRITE; + v->syscall_len = data_args->bytes_count; + } else { + return 0; + v->source = DATA_SOURCE_IO_EVENT; + v->syscall_len = sizeof(*buffer); + } v->thread_trace_id = trace_id; v->msg_type = MSG_COMMON; bpf_get_current_comm(v->comm, sizeof(v->comm)); + bool is_vecs = false; + if (data_args->iov != NULL) + is_vecs = true; #if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) struct tail_calls_context *context = (struct tail_calls_context *)v->data; context->max_size_limit = data_max_sz; context->push_reassembly_bytes = 0; - context->vecs = false; + context->vecs = is_vecs; context->is_close = false; context->dir = direction; #ifdef SUPPORTS_KPROBE_ONLY @@ -366,7 +380,7 @@ static __inline int trace_io_event_common(void *ctx, return 0; #else return __output_data_common(ctx, tracer_ctx, v_buff, data_args, - direction, false, data_max_sz, false, 0); + direction, is_vecs, data_max_sz, false, 0); #endif } @@ -479,7 +493,9 @@ TP_SYSCALL_PROG(exit_preadv2) (struct syscall_comm_exit_ctx * ctx) { #endif /* SUPPORTS_KPROBE_ONLY */ // File Write Event Tracing -static __inline int do_sys_enter_pwrite(int fd, enum syscall_src_func fn) +static __inline int do_sys_enter_pwrite(int fd, const char *buf, + struct iovec *iov, int iovlen, + enum syscall_src_func fn) { __u32 k0 = 0; struct member_fields_offset *offset = members_offset__lookup(&k0); @@ -490,6 +506,9 @@ static __inline int do_sys_enter_pwrite(int fd, enum syscall_src_func fn) struct data_args_t write_args = {}; write_args.source_fn = fn; write_args.fd = fd; + write_args.buf = buf; + write_args.iov = iov; + write_args.iovlen = iovlen; write_args.enter_ts = bpf_ktime_get_ns(); active_write_args_map__update(&id, &write_args); return 0; @@ -500,7 +519,8 @@ static __inline int do_sys_enter_pwrite(int fd, enum syscall_src_func fn) #ifdef SUPPORTS_KPROBE_ONLY KPROG(ksys_pwrite64) (struct pt_regs * ctx) { int fd = (int)PT_REGS_PARM1(ctx); - return do_sys_enter_pwrite(fd, SYSCALL_FUNC_PWRITE64); + const char *buf = (char *)PT_REGS_PARM2(ctx); + return do_sys_enter_pwrite(fd, buf, NULL, 0, SYSCALL_FUNC_PWRITE64); } /* @@ -510,25 +530,32 @@ KPROG(ksys_pwrite64) (struct pt_regs * ctx) { */ KPROG(do_pwritev) (struct pt_regs * ctx) { int fd = (int)PT_REGS_PARM1(ctx); - return do_sys_enter_pwrite(fd, SYSCALL_FUNC_PWRITEV); + struct iovec *iov = (struct iovec *)PT_REGS_PARM2(ctx); + int iovlen = (int)PT_REGS_PARM3(ctx); + return do_sys_enter_pwrite(fd, NULL, iov, iovlen, SYSCALL_FUNC_PWRITEV); } #else // /sys/kernel/debug/tracing/events/syscalls/sys_enter_pwrite64/format TP_SYSCALL_PROG(enter_pwrite64) (struct syscall_comm_enter_ctx * ctx) { int fd = ctx->fd; - return do_sys_enter_pwrite(fd, SYSCALL_FUNC_PWRITE64); + const char *buf = (const char *)ctx->buf; + return do_sys_enter_pwrite(fd, buf, NULL, 0, SYSCALL_FUNC_PWRITE64); } // /sys/kernel/debug/tracing/events/syscalls/sys_enter_pwritev/format TP_SYSCALL_PROG(enter_pwritev) (struct syscall_comm_enter_ctx * ctx) { int fd = ctx->fd; - return do_sys_enter_pwrite(fd, SYSCALL_FUNC_PWRITEV); + struct iovec *iov = (struct iovec *)ctx->buf; + int iovlen = (int)ctx->count; + return do_sys_enter_pwrite(fd, NULL, iov, iovlen, SYSCALL_FUNC_PWRITEV); } // /sys/kernel/debug/tracing/events/syscalls/sys_enter_pwritev2/format TP_SYSCALL_PROG(enter_pwritev2) (struct syscall_comm_enter_ctx * ctx) { int fd = ctx->fd; - return do_sys_enter_pwrite(fd, SYSCALL_FUNC_PWRITEV2); + struct iovec *iov = (struct iovec *)ctx->buf; + int iovlen = (int)ctx->count; + return do_sys_enter_pwrite(fd, NULL, iov, iovlen, SYSCALL_FUNC_PWRITEV2); } #endif /* SUPPORTS_KPROBE_ONLY */ diff --git a/agent/src/ebpf/kernel/include/common.h b/agent/src/ebpf/kernel/include/common.h index 67b23fe8e3d..b75b4053b5c 100644 --- a/agent/src/ebpf/kernel/include/common.h +++ b/agent/src/ebpf/kernel/include/common.h @@ -112,6 +112,7 @@ enum process_data_extra_source { DATA_SOURCE_RESERVED, DATA_SOURCE_DPDK, DATA_SOURCE_UNIX_SOCKET, + DATA_SOURCE_FILE_WRITE }; struct protocol_message_t { diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index fea5890ca95..f2ff20ccd12 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -3282,12 +3282,14 @@ static __inline int __output_data_common(void *ctx, * 1. The delay of the periodic push event exceeds the threshold (typically 50 milliseconds). * 2. The number of events exceeds the maximum batch size (MAX_EVENTS_BURST, typically 32). * 3. The data buffer is full (not enough space for another struct __socket_data). + * 4. Send the file content immediately when collecting it to prevent out-of-order issues caused by buffering. */ __u64 curr_time = bpf_ktime_get_ns(); __u64 diff = curr_time - tracer_ctx->last_period_timestamp; if (diff > PERIODIC_PUSH_DELAY_THRESHOLD_NS || v_buff->events_num >= MAX_EVENTS_BURST || (args && args->extra_iovlen) || + v->source == DATA_SOURCE_FILE_WRITE || ((sizeof(v_buff->data) - v_buff->len) < sizeof(*v))) { finalize_data_output(ctx, tracer_ctx, curr_time, diff, v_buff); } @@ -3391,6 +3393,7 @@ static __inline int output_data_common(void *ctx) if (diff > PERIODIC_PUSH_DELAY_THRESHOLD_NS || v_buff->events_num >= MAX_EVENTS_BURST || (args && args->extra_iovlen) || + v->source == DATA_SOURCE_FILE_WRITE || ((sizeof(v_buff->data) - v_buff->len) < sizeof(*v))) { finalize_data_output(ctx, tracer_ctx, curr_time, diff, v_buff); } diff --git a/agent/src/ebpf/samples/rust/socket-tracer/src/main.rs b/agent/src/ebpf/samples/rust/socket-tracer/src/main.rs index a28af3740a5..ec2a5532a0b 100644 --- a/agent/src/ebpf/samples/rust/socket-tracer/src/main.rs +++ b/agent/src/ebpf/samples/rust/socket-tracer/src/main.rs @@ -202,137 +202,6 @@ extern "C" fn debug_callback(_data: *mut c_char, len: c_int) { } extern "C" fn socket_trace_callback(_: *mut c_void, queue_id: c_int, sd: *mut SK_BPF_DATA) -> c_int { - unsafe { - let mut proto_tag = String::from(""); - if sk_proto_safe(sd) == SOCK_DATA_OTHER { - proto_tag.push_str("ORTHER"); - } else if sk_proto_safe(sd) == SOCK_DATA_HTTP1 { - proto_tag.push_str("HTTP1"); - } else if sk_proto_safe(sd) == SOCK_DATA_HTTP2 { - proto_tag.push_str("HTTP2"); - } else if sk_proto_safe(sd) == SOCK_DATA_DNS { - proto_tag.push_str("DNS"); - } else if sk_proto_safe(sd) == SOCK_DATA_MYSQL { - proto_tag.push_str("MYSQL"); - } else if sk_proto_safe(sd) == SOCK_DATA_POSTGRESQL { - proto_tag.push_str("POSTGRESQL"); - } else if sk_proto_safe(sd) == SOCK_DATA_REDIS { - proto_tag.push_str("REDIS"); - } else if sk_proto_safe(sd) == SOCK_DATA_KAFKA { - proto_tag.push_str("KAFKA"); - } else if sk_proto_safe(sd) == SOCK_DATA_MQTT { - proto_tag.push_str("MQTT"); - } else if sk_proto_safe(sd) == SOCK_DATA_AMQP { - proto_tag.push_str("AMQP"); - } else if sk_proto_safe(sd) == SOCK_DATA_NATS { - proto_tag.push_str("NATS"); - } else if sk_proto_safe(sd) == SOCK_DATA_PULSAR { - proto_tag.push_str("PULSAR"); - } else if sk_proto_safe(sd) == SOCK_DATA_DUBBO { - proto_tag.push_str("DUBBO"); - } else if sk_proto_safe(sd) == SOCK_DATA_SOFARPC { - proto_tag.push_str("SOFARPC"); - } else if sk_proto_safe(sd) == SOCK_DATA_FASTCGI { - proto_tag.push_str("FASTCGI"); - } else if sk_proto_safe(sd) == SOCK_DATA_BRPC { - proto_tag.push_str("BRPC"); - } else if sk_proto_safe(sd) == SOCK_DATA_TARS { - proto_tag.push_str("TARS"); - } else if sk_proto_safe(sd) == SOCK_DATA_SOME_IP { - proto_tag.push_str("SomeIP"); - } else if sk_proto_safe(sd) == SOCK_DATA_ISO8583 { - proto_tag.push_str("ISO8583"); - } else if sk_proto_safe(sd) == SOCK_DATA_MONGO { - proto_tag.push_str("MONGO"); - } else if sk_proto_safe(sd) == SOCK_DATA_TLS { - proto_tag.push_str("TLS"); - } else if sk_proto_safe(sd) == SOCK_DATA_ORACLE { - proto_tag.push_str("ORACLE"); - } else if sk_proto_safe(sd) == SOCK_DATA_OPENWIRE { - proto_tag.push_str("OPENWIRE"); - } else if sk_proto_safe(sd) == SOCK_DATA_ZMTP { - proto_tag.push_str("ZMTP"); - } else if sk_proto_safe(sd) == SOCK_DATA_WEBSPHEREMQ { - proto_tag.push_str("WEBSPHEREMQ"); - } else { - proto_tag.push_str("UNSPEC"); - } - - println!("+ --------------------------------- +"); - if sk_proto_safe(sd) == SOCK_DATA_HTTP1 { - let data = sk_data_str_safe(sd); - println!("{} <{}> BATCHLAST {} DIR {} TYPE {} PID {} THREAD_ID {} COROUTINE_ID {} CONTAINER_ID {} SOURCE {} ROLE {} COMM {} {} LEN {} SYSCALL_LEN {} SOCKET_ID 0x{:x} TRACE_ID 0x{:x} TCP_SEQ {} DATA_SEQ {} TLS {} TimeStamp {}\n{}", - date_time((*sd).timestamp), - proto_tag, - (*sd).batch_last_data, - (*sd).direction, - (*sd).msg_type, - (*sd).process_id, - (*sd).thread_id, - (*sd).coroutine_id, - sd_container_id_safe(sd), - (*sd).source, - (*sd).socket_role, - process_name_safe(sd), - flow_info(sd), - (*sd).cap_len, - (*sd).syscall_len, - (*sd).socket_id, - (*sd).syscall_trace_id_call, - (*sd).tcp_seq, - (*sd).cap_seq, - (*sd).is_tls, - (*sd).timestamp, - data); - } else { - let data: Vec = sk_data_bytes_safe(sd); - println!("{} <{}> BATCHLAST {} DIR {} TYPE {} PID {} THREAD_ID {} COROUTINE_ID {} CONTAINER_ID {} SOURCE {} ROLE {} COMM {} {} LEN {} SYSCALL_LEN {} SOCKET_ID 0x{:x} TRACE_ID 0x{:x} TCP_SEQ {} DATA_SEQ {} TLS {} TimeStamp {}", - date_time((*sd).timestamp), - proto_tag, - (*sd).batch_last_data, - (*sd).direction, - (*sd).msg_type, - (*sd).process_id, - (*sd).thread_id, - (*sd).coroutine_id, - sd_container_id_safe(sd), - (*sd).source, - (*sd).socket_role, - process_name_safe(sd), - flow_info(sd), - (*sd).cap_len, - (*sd).syscall_len, - (*sd).socket_id, - (*sd).syscall_trace_id_call, - (*sd).tcp_seq, - (*sd).cap_seq, - (*sd).is_tls, - (*sd).timestamp); - if (*sd).source == 2 { - print_uprobe_http2_info((*sd).cap_data, (*sd).cap_len); - } else if (*sd).source == 4 { - print_io_event_info((*sd).cap_data, (*sd).cap_len); - } else if (*sd).source == 5 { - print_uprobe_grpc_dataframe((*sd).cap_data, (*sd).cap_len); - } else if sk_proto_safe(sd) == SOCK_DATA_OTHER { - for x in data.into_iter() { - print!("{} ", format!("{:02x}", x)); - } - } else { - for x in data.into_iter() { - if x < 32 || x > 126 { - print!("."); - continue; - } - let b = x as char; - print!("{0}", b); - } - } - print!("\x1b[0m\n"); - } - - println!("+ --------------------------------- +\n"); - } 0 } @@ -416,29 +285,29 @@ fn main() { } } unsafe { - enable_ebpf_protocol(SOCK_DATA_HTTP1 as c_int); - enable_ebpf_protocol(SOCK_DATA_HTTP2 as c_int); - enable_ebpf_protocol(SOCK_DATA_DUBBO as c_int); - enable_ebpf_protocol(SOCK_DATA_SOFARPC as c_int); - enable_ebpf_protocol(SOCK_DATA_FASTCGI as c_int); - enable_ebpf_protocol(SOCK_DATA_BRPC as c_int); - enable_ebpf_protocol(SOCK_DATA_TARS as c_int); - enable_ebpf_protocol(SOCK_DATA_SOME_IP as c_int); - enable_ebpf_protocol(SOCK_DATA_ISO8583 as c_int); - enable_ebpf_protocol(SOCK_DATA_MYSQL as c_int); - enable_ebpf_protocol(SOCK_DATA_POSTGRESQL as c_int); - enable_ebpf_protocol(SOCK_DATA_REDIS as c_int); - enable_ebpf_protocol(SOCK_DATA_KAFKA as c_int); - enable_ebpf_protocol(SOCK_DATA_MQTT as c_int); - enable_ebpf_protocol(SOCK_DATA_AMQP as c_int); - enable_ebpf_protocol(SOCK_DATA_OPENWIRE as c_int); - enable_ebpf_protocol(SOCK_DATA_ZMTP as c_int); - enable_ebpf_protocol(SOCK_DATA_WEBSPHEREMQ as c_int); - enable_ebpf_protocol(SOCK_DATA_NATS as c_int); - enable_ebpf_protocol(SOCK_DATA_PULSAR as c_int); - enable_ebpf_protocol(SOCK_DATA_DNS as c_int); - enable_ebpf_protocol(SOCK_DATA_MONGO as c_int); - enable_ebpf_protocol(SOCK_DATA_TLS as c_int); + // enable_ebpf_protocol(SOCK_DATA_HTTP1 as c_int); + // enable_ebpf_protocol(SOCK_DATA_HTTP2 as c_int); + // enable_ebpf_protocol(SOCK_DATA_DUBBO as c_int); + // enable_ebpf_protocol(SOCK_DATA_SOFARPC as c_int); + // enable_ebpf_protocol(SOCK_DATA_FASTCGI as c_int); + // enable_ebpf_protocol(SOCK_DATA_BRPC as c_int); + // enable_ebpf_protocol(SOCK_DATA_TARS as c_int); + // enable_ebpf_protocol(SOCK_DATA_SOME_IP as c_int); + // enable_ebpf_protocol(SOCK_DATA_ISO8583 as c_int); + // enable_ebpf_protocol(SOCK_DATA_MYSQL as c_int); + // enable_ebpf_protocol(SOCK_DATA_POSTGRESQL as c_int); + // enable_ebpf_protocol(SOCK_DATA_REDIS as c_int); + // enable_ebpf_protocol(SOCK_DATA_KAFKA as c_int); + // enable_ebpf_protocol(SOCK_DATA_MQTT as c_int); + // enable_ebpf_protocol(SOCK_DATA_AMQP as c_int); + // enable_ebpf_protocol(SOCK_DATA_OPENWIRE as c_int); + // enable_ebpf_protocol(SOCK_DATA_ZMTP as c_int); + // enable_ebpf_protocol(SOCK_DATA_WEBSPHEREMQ as c_int); + // enable_ebpf_protocol(SOCK_DATA_NATS as c_int); + // enable_ebpf_protocol(SOCK_DATA_PULSAR as c_int); + // enable_ebpf_protocol(SOCK_DATA_DNS as c_int); + // enable_ebpf_protocol(SOCK_DATA_MONGO as c_int); + // enable_ebpf_protocol(SOCK_DATA_TLS as c_int); //set_feature_regex( // FEATURE_UPROBE_OPENSSL, @@ -449,9 +318,9 @@ fn main() { // CString::new(".*".as_bytes()).unwrap().as_c_str().as_ptr(), //); - //set_io_event_collect_mode(1); + set_io_event_collect_mode(2); - //set_io_event_minimal_duration(1000000); + set_io_event_minimal_duration(10); //// enable go auto traceing, //set_go_tracing_timeout(120); @@ -682,11 +551,11 @@ fn main() { // test data limit max set_data_limit_max(10000); - //let empty_string = CString::new("").expect("CString::new failed"); - //if datadump_set_config(0, empty_string.as_ptr(), 0, 60, debug_callback) != 0 { - // println!("datadump_set_config() error"); - // ::std::process::exit(1); - //} + let empty_string = CString::new("").expect("CString::new failed"); + if datadump_set_config(0, empty_string.as_ptr(), 0, 600000, debug_callback) != 0 { + println!("datadump_set_config() error"); + ::std::process::exit(1); + } print!("socket_tracer_start() finish\n"); diff --git a/agent/src/ebpf/user/ctrl_tracer.c b/agent/src/ebpf/user/ctrl_tracer.c index 4b80840d98d..6ea442a2e76 100644 --- a/agent/src/ebpf/user/ctrl_tracer.c +++ b/agent/src/ebpf/user/ctrl_tracer.c @@ -18,6 +18,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include + #include "tracer.h" #include "socket.h" @@ -101,6 +108,14 @@ static void match_pids_help(void) fprintf(stderr, " %s match_pids print\n", DF_BPF_NAME); } +static void javalog_help(void) +{ + fprintf(stderr, "Print java hs_err_pid*.log\n"); + fprintf(stderr, "Usage:\n" " %s javalog show\n", DF_BPF_NAME); + fprintf(stderr, "For example:\n"); + fprintf(stderr, " %s javalog show\n", DF_BPF_NAME); +} + static void cpdbg_help(void) { fprintf(stderr, @@ -182,6 +197,67 @@ static void datadump_help(void) DF_BPF_NAME); } +static long count_lines(const char *path) +{ + FILE *f = fopen(path, "r"); + if (!f) return -1; + + long lines = 0; + int c; + while ((c = fgetc(f)) != EOF) { + if (c == '\n') + lines++; + } + fclose(f); + return lines; +} + +static int ls_path(const char *pattern) +{ + glob_t g; + int ret = glob(pattern, GLOB_NOCHECK, NULL, &g); + if (ret != 0) return -1; + + for (size_t i = 0; i < g.gl_pathc; ++i) { + const char *path = g.gl_pathv[i]; + + struct stat st; + if (stat(path, &st) != 0) { + fprintf(stderr, "stat %s failed: %s\n", path, strerror(errno)); + continue; + } + + long lines = count_lines(path); + + printf("%s\n", path); + printf(" size : %ld bytes\n", (long)st.st_size); + + if (lines >= 0) + printf(" lines: %ld\n", lines); + else + printf(" lines: \n"); + } + + globfree(&g); + return 0; +} + +// rm_path("/var/log/datadump-*.log"); +static int rm_path(const char *pattern) +{ + glob_t g; + int ret = glob(pattern, GLOB_NOCHECK, NULL, &g); + if (ret != 0) return -1; + + for (size_t i = 0; i < g.gl_pathc; ++i) { + printf("unlink %s\n", g.gl_pathv[i]); + unlink(g.gl_pathv[i]); + } + + globfree(&g); + return 0; +} + static int __exec_command(const char *cmd, const char *args) { FILE *fp; @@ -635,6 +711,21 @@ static int match_pids_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd, return ETR_OK; } +static int javalog_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd, + struct df_bpf_conf *conf) +{ + switch (conf->cmd) { + case DF_BPF_CMD_SHOW: + __exec_command("cat /var/log/deepflow-agent/java-crash.log", ""); + break; + + default: + return ETR_NOTSUPP; + } + + return ETR_OK; +} + static int datadump_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd, struct df_bpf_conf *conf) { @@ -735,13 +826,13 @@ static int datadump_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd, if (conf->argc != 0) fprintf(stdout, "Invalid params.\n"); else - __exec_command("ls -sh /var/log/datadump-*.log", ""); + ls_path("/var/log/datadump-*.log"); break; case DF_BPF_CMD_FLUSH: if (conf->argc != 0) fprintf(stdout, "Invalid params.\n"); else - __exec_command("rm -rf /var/log/datadump-*.log", ""); + rm_path("/var/log/datadump-*.log"); break; case DF_BPF_CMD_FIND: { @@ -892,13 +983,19 @@ struct df_bpf_obj match_pids_obj = { .do_cmd = match_pids_do_cmd, }; +struct df_bpf_obj javalog_obj = { + .name = "javalog", + .help = javalog_help, + .do_cmd = javalog_do_cmd, +}; + static void usage(void) { fprintf(stderr, "Usage:\n" " " DF_BPF_NAME " [OPTIONS] OBJECT { COMMAND | help }\n" "Parameters:\n" - " OBJECT := { tracer socktrace datadump cpdbg match_pids}\n" + " OBJECT := { tracer socktrace datadump cpdbg match_pids javalog}\n" " COMMAND := { show list set print}\n" "Options:\n" " -v, --verbose\n" @@ -917,7 +1014,9 @@ static struct df_bpf_obj *df_bpf_obj_get(const char *name) return &cpdbg_obj; } else if (strcmp(name, "match_pids") == 0) { return &match_pids_obj; - } + } else if (strcmp(name, "javalog") == 0) { + return &javalog_obj; + } return NULL; } diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index 379f2161da9..205d1a2eb5d 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -1044,6 +1044,57 @@ static inline int get_additional_memory_size(struct __socket_data_buffer *buf) return extra_size; } +// ------------------------------------ java debug ------------------- +#define JAVA_LOG_PATH "/var/log/deepflow-agent/java-crash.log" +#include +static int log_fd = -1; +static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER; + +static int open_log_if_needed() +{ + if (log_fd != -1) return 0; + + const char *path = "/var/log/deepflow-agent"; + + if (access(path, F_OK) != 0) { + if (mkdir(path, 0755) != 0 && errno != EEXIST) { + perror("mkdir"); + } + } + + int fd = open(JAVA_LOG_PATH, + O_WRONLY | O_CREAT | O_APPEND, + 0644); + if (fd < 0) { + perror("open java log"); + return -1; + } + + log_fd = fd; + return 0; +} + + +void javalog_append(const char *buf, size_t len) +{ + pthread_mutex_lock(&log_lock); + + if (open_log_if_needed() != 0) { + pthread_mutex_unlock(&log_lock); + return; + } + + ssize_t ret = write(log_fd, buf, len); + if (ret < 0) { + perror("write java log"); + close(log_fd); + log_fd = -1; + } + + pthread_mutex_unlock(&log_lock); +} + +// ------------------------------------------------------------------- // Read datas from perf ring-buffer and dispatch. static void reader_raw_cb(void *cookie, void *raw, int raw_size) { @@ -1350,6 +1401,25 @@ static void reader_raw_cb(void *cookie, void *raw, int raw_size) } submit_data->syscall_len += offset; submit_data->cap_len = len + offset; + + if (sd->source == DATA_SOURCE_FILE_WRITE) { + char wr_tag[256]; + if (submit_data->syscall_len != submit_data->cap_len) { + int java_len = snprintf(wr_tag, sizeof(wr_tag), "\n### ==== PID %d syscall_len %d cap_len %d ====\n", sd->tgid, submit_data->syscall_len, submit_data->cap_len); + javalog_append(wr_tag, java_len); + } + // 23(#) 20( ) 41(A) 20( ) 66(f) 61(a) 74(t) 61(a) 6C(l) 20( ) 65(e) 72(r) 72(r) 6F(o) + // # A fatal error has + if (submit_data->cap_data[0] == '#' && submit_data->cap_data[1] == ' ' && submit_data->cap_data[2] == 'A' && submit_data->cap_data[3] == ' ' && submit_data->cap_data[4] == 'f' && + submit_data->cap_data[5] == 'a' && submit_data->cap_data[6] == 't' && submit_data->cap_data[7] == 'a' && submit_data->cap_data[8] == 'l' && submit_data->cap_data[9] == ' ') { + char *timestamp = gen_timestamp_str(0); + int java_len = snprintf(wr_tag, sizeof(wr_tag), "# %s containerID %s\n", timestamp, submit_data->container_id); + javalog_append(wr_tag, java_len); + free(timestamp); + } + javalog_append(submit_data->cap_data, submit_data->cap_len); + } + burst_data[i] = submit_data; start += From 81cddae019d859b36bdaadc895652fcdba88700f Mon Sep 17 00:00:00 2001 From: Jiping Yin Date: Sat, 27 Dec 2025 11:16:23 +0800 Subject: [PATCH 2/2] fix cpu cache data not push --- agent/src/ebpf/kernel/files_rw.bpf.c | 13 +++++---- agent/src/ebpf/user/ctrl_tracer.c | 41 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index 508a38e6d5f..cd2f396a6b7 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -322,8 +322,8 @@ static __inline int trace_io_event_common(void *ctx, } buffer->bytes_count = data_args->bytes_count; - buffer->latency = latency; - buffer->operation = direction; + //buffer->latency = latency; + //buffer->operation = direction; struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0); if (!v_buff) @@ -343,7 +343,7 @@ static __inline int trace_io_event_common(void *ctx, v->fd = data_args->fd; v->tgid = tgid; v->pid = (__u32) pid_tgid; - v->coroutine_id = trace_key.goid; + //v->coroutine_id = trace_key.goid; v->timestamp = data_args->enter_ts; v->syscall_len = sizeof(*buffer); // hs_err_pid @@ -354,11 +354,12 @@ static __inline int trace_io_event_common(void *ctx, v->source = DATA_SOURCE_FILE_WRITE; v->syscall_len = data_args->bytes_count; } else { + __sync_fetch_and_add(&tracer_ctx->push_buffer_refcnt, -1); return 0; - v->source = DATA_SOURCE_IO_EVENT; - v->syscall_len = sizeof(*buffer); + //v->source = DATA_SOURCE_IO_EVENT; + //v->syscall_len = sizeof(*buffer); } - v->thread_trace_id = trace_id; + //v->thread_trace_id = trace_id; v->msg_type = MSG_COMMON; bpf_get_current_comm(v->comm, sizeof(v->comm)); bool is_vecs = false; diff --git a/agent/src/ebpf/user/ctrl_tracer.c b/agent/src/ebpf/user/ctrl_tracer.c index 6ea442a2e76..91d82a29a51 100644 --- a/agent/src/ebpf/user/ctrl_tracer.c +++ b/agent/src/ebpf/user/ctrl_tracer.c @@ -1223,8 +1223,49 @@ static int parse_args(int argc, char *argv[], struct df_bpf_conf *conf) return 0; } +#define LOG_PATH "/var/log/deepflow-agent/java-crash.log" + +int ensure_log_file(void) +{ + int fd; + + /* 先检查文件是否存在 */ + if (access(LOG_PATH, F_OK) == 0) { + return 0; // 已存在,什么都不做 + } + + /* 确保目录存在 */ + if (mkdir("/var/log/deepflow-agent", 0755) != 0 && errno != EEXIST) { + perror("mkdir"); + return -1; + } + + /* 创建文件(仅当不存在时创建) */ + fd = open(LOG_PATH, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + if (errno == EEXIST) return 0; + perror("open"); + return -1; + } + + /* 写入初始内容 */ + const char *init = "-----\n"; + if (write(fd, init, strlen(init)) < 0) { + perror("write"); + close(fd); + return -1; + } + + close(fd); + return 0; +} + int main(int argc, char *argv[]) { + if (ensure_log_file() != 0) { + fprintf(stderr, "java log file failed\n"); + } + char *prog; struct df_bpf_conf conf; struct df_bpf_obj *obj;