TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:
- zero-copy and per-cpu splice() tracing
- binary tracing without printf overhead
- structured logging records exposed under /debug/tracing/events
- trace events embedded in function tracer output and other plugins
- user-defined, per tracepoint filter expressions
...
Cons and problems:
- no dev-t info for the output of plug, unplug-timer and unplug-io events.
no dev-t info for getrq and sleeprq events if bio == NULL.
no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
- for large packet commands, only 16 bytes of the command will be output.
Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
supports dynamic-sized strings.
- a packet command is converted to a string in TP-assign, not TP-print.
While blktrace do the convertion just before output.
- in blktrace, an event can have 2 different print formats, but a TRACE-EVENT
has a unique format. (see the output of getrq and rq-insert)
Following are some comparisons between TRACE-EVENT and blktrace:
plug:
kjournald-480 [000] 303.084981: block-plug: [kjournald]
kjournald-480 [000] 303.084981: 8,0 P N [kjournald]
unplug-io:
kblockd/0-118 [000] 300.052973: block-unplug-io: [kblockd/0] 1
kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1
remap:
kjournald-480 [000] 303.085042: block-remap: 8,0 W 102736992 + 8 <- (8,8) 33384
kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384
bio-backmerge:
kjournald-480 [000] 303.085086: block-bio-backmerge: 8,0 W 102737032 + 8 [kjournald]
kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald]
getrq:
kjournald-480 [000] 303.084974: block-getrq: 8,0 W 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald]
bash-2066 [001] 1072.953770: 8,0 G N [bash]
bash-2066 [001] 1072.953773: block-getrq: 0,0 N 0 + 0 [bash]
rq-complete:
konsole-2065 [001] 300.053184: block-rq-complete: 8,0 W () 103669040 + 16 [0]
konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0]
rq-insert:
kjournald-480 [000] 303.084985: block-rq-insert: 8,0 W 0 () 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald]
ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0]
ksoftirqd/1-7 [001] 1072.953813: block-rq-complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
-diff #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE-TRACE-POINTS
+#include <trace/events/block.h>
#include "blk.h"
-DEFINE-TRACE(block-plug);
-DEFINE-TRACE(block-unplug-io);
-DEFINE-TRACE(block-unplug-timer);
-DEFINE-TRACE(block-getrq);
-DEFINE-TRACE(block-sleeprq);
-DEFINE-TRACE(block-rq-requeue);
-DEFINE-TRACE(block-bio-backmerge);
-DEFINE-TRACE(block-bio-frontmerge);
-DEFINE-TRACE(block-bio-queue);
-DEFINE-TRACE(block-rq-complete);
-DEFINE-TRACE(block-remap); /* Also used in drivers/md/dm.c */
EXPORT-TRACEPOINT-SYMBOL-GPL(block-remap);
+EXPORT-TRACEPOINT-SYMBOL-GPL(block-bio-complete);
static int #include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace-api.h>
-#include <trace/block.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
+#include <trace/events/block.h>
+
#include "blk.h"
static DEFINE-SPINLOCK(elv-list-lock);
static LIST-HEAD(elv-list);
-DEFINE-TRACE(block-rq-abort);
-
/*
* Merge hash stuff.
*/
@@ -55,9 +54,6 @@ static const int elv-hash-shift = 6;
#define rq-hash-key(rq) ((rq)->sector + (rq)->nr-sectors)
#define ELV-ON-HASH(rq) (!hlist-unhashed(&(rq)->hash))
-DEFINE-TRACE(block-rq-insert);
-DEFINE-TRACE(block-rq-issue);
-
/*
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
diff #include <linux/blktrace-api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
#define DM-MSG-PREFIX "core"
@@ -53,8 +54,6 @@ struct dm-target-io {
union map-info info;
};
-DEFINE-TRACE(block-bio-complete);
-
/*
* For request-based dm.
* One of these is allocated per request.
diff #include <linux/blktrace-api.h>
-#include <trace/block.h>
#include <scsi/sg.h> /* for struct sg-iovec */
-DEFINE-TRACE(block-split);
+#include <trace/events/block.h>
/*
* Test patch to inline a certain number of bi-io-vec's inside the bio
diff
+#ifdef CONFIG-EVENT-TRACING
+
+extern void #endif
diff -#define -TRACE-BLOCK-H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE-TRACE(block-rq-abort,
- TP-PROTO(struct request-queue *q, struct request *rq),
- TP-ARGS(q, rq));
-
-DECLARE-TRACE(block-rq-insert,
- TP-PROTO(struct request-queue *q, struct request *rq),
- TP-ARGS(q, rq));
-
-DECLARE-TRACE(block-rq-issue,
- TP-PROTO(struct request-queue *q, struct request *rq),
- TP-ARGS(q, rq));
-
-DECLARE-TRACE(block-rq-requeue,
- TP-PROTO(struct request-queue *q, struct request *rq),
- TP-ARGS(q, rq));
-
-DECLARE-TRACE(block-rq-complete,
- TP-PROTO(struct request-queue *q, struct request *rq),
- TP-ARGS(q, rq));
-
-DECLARE-TRACE(block-bio-bounce,
- TP-PROTO(struct request-queue *q, struct bio *bio),
- TP-ARGS(q, bio));
-
-DECLARE-TRACE(block-bio-complete,
- TP-PROTO(struct request-queue *q, struct bio *bio),
- TP-ARGS(q, bio));
-
-DECLARE-TRACE(block-bio-backmerge,
- TP-PROTO(struct request-queue *q, struct bio *bio),
- TP-ARGS(q, bio));
-
-DECLARE-TRACE(block-bio-frontmerge,
- TP-PROTO(struct request-queue *q, struct bio *bio),
- TP-ARGS(q, bio));
-
-DECLARE-TRACE(block-bio-queue,
- TP-PROTO(struct request-queue *q, struct bio *bio),
- TP-ARGS(q, bio));
-
-DECLARE-TRACE(block-getrq,
- TP-PROTO(struct request-queue *q, struct bio *bio, int rw),
- TP-ARGS(q, bio, rw));
-
-DECLARE-TRACE(block-sleeprq,
- TP-PROTO(struct request-queue *q, struct bio *bio, int rw),
- TP-ARGS(q, bio, rw));
-
-DECLARE-TRACE(block-plug,
- TP-PROTO(struct request-queue *q),
- TP-ARGS(q));
-
-DECLARE-TRACE(block-unplug-timer,
- TP-PROTO(struct request-queue *q),
- TP-ARGS(q));
-
-DECLARE-TRACE(block-unplug-io,
- TP-PROTO(struct request-queue *q),
- TP-ARGS(q));
-
-DECLARE-TRACE(block-split,
- TP-PROTO(struct request-queue *q, struct bio *bio, unsigned int pdu),
- TP-ARGS(q, bio, pdu));
-
-DECLARE-TRACE(block-remap,
- TP-PROTO(struct request-queue *q, struct bio *bio, dev-t dev,
- sector-t from),
- TP-ARGS(q, bio, dev, from));
-
-#endif
diff +#define -TRACE-BLOCK-H
+
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE-SYSTEM
+#define TRACE-SYSTEM block
+
+TRACE-EVENT(block-rq-abort,
+
+ TP-PROTO(struct request-queue *q, struct request *rq),
+
+ TP-ARGS(q, rq),
+
+ TP-STRUCT+ ),
+
+ TP-fast-assign(
+ + ),
+
+ TP-printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(+ TP-PROTO(struct request-queue *q, struct request *rq),
+
+ TP-ARGS(q, rq),
+
+ TP-STRUCT+ + 0 : rq->hard-nr-sectors;
+ + MAJOR(+ TP-PROTO(struct request-queue *q, struct request *rq),
+
+ TP-ARGS(q, rq),
+
+ TP-STRUCT+ + 0 : rq->hard-nr-sectors;
+ + MAJOR(+ TP-PROTO(struct request-queue *q, struct request *rq),
+
+ TP-ARGS(q, rq),
+
+ TP-STRUCT+ ),
+
+ TP-fast-assign(
+ +
+ TP-printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(+ TP-PROTO(struct request-queue *q, struct request *rq),
+
+ TP-ARGS(q, rq),
+
+ TP-STRUCT+ ),
+
+ TP-fast-assign(
+ + ),
+
+ TP-printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(+
+ TP-ARGS(q, bio),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q, bio),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + MAJOR(+
+ TP-ARGS(q, bio),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q, bio),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q, bio),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q, bio, rw),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ +
+ TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q, bio, rw),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ +
+ TP-printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(+
+ TP-ARGS(q),
+
+ TP-STRUCT+
+ TP-printk("[%s]", + TP-ARGS(q),
+
+ TP-STRUCT+ memcpy(+
+ TP-PROTO(struct request-queue *q),
+
+ TP-ARGS(q),
+
+ TP-STRUCT+ memcpy(+
+ TP-PROTO(struct request-queue *q, struct bio *bio,
+ unsigned int new-sector),
+
+ TP-ARGS(q, bio, new-sector),
+
+ TP-STRUCT+
+ TP-fast-assign(
+ + TP-printk("%d,%d %s %llu / %llu [%s]",
+ MAJOR(+ sector-t from),
+
+ TP-ARGS(q, bio, dev, from),
+
+ TP-STRUCT+ ),
+
+ TP-fast-assign(
+ +
+ TP-printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+ MAJOR(+
+/* This part must be outside protection */
+#include <trace/define-trace.h>
+
diff obj-$(CONFIG-WORKQUEUE-TRACER) += trace-workqueue.o
-obj-$(CONFIG-BLK-DEV-IO-TRACE) += blktrace.o
+obj-$(CONFIG-BLK-DEV-IO-TRACE) += blktrace.o
+ifeq ($(CONFIG-BLOCK),y)
+obj-$(CONFIG-EVENT-TRACING) += blktrace.o
+endif
obj-$(CONFIG-EVENT-TRACING) += trace-events.o
obj-$(CONFIG-EVENT-TRACING) += trace-export.o
obj-$(CONFIG-FTRACE-SYSCALLS) += trace-syscalls.o
diff #include <linux/time.h>
-#include <trace/block.h>
#include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
#include "trace-output.h"
+#ifdef CONFIG-BLK-DEV-IO-TRACE
+
static unsigned int blktrace-seq +#endif /* CONFIG-BLK-DEV-IO-TRACE */
+
+#ifdef CONFIG-EVENT-TRACING
+
+void + buf[0] = ' ';
+ return;
+ }
+
+ for (end = len - 1; end >= 0; end+ if (i == end && end != len - 1) {
+ sprintf(buf, " ..");
+ break;
+ }
+ }
+}
+
+void + rwbs[i++] = 'D';
+ else if (bytes)
+ rwbs[i++] = 'R';
+ else
+ rwbs[i++] = 'N';
+
+ if (rw & 1 << BIO-RW-AHEAD)
+ rwbs[i++] = 'A';
+ if (rw & 1 << BIO-RW-BARRIER)
+ rwbs[i++] = 'B';
+ if (rw & 1 << BIO-RW-SYNCIO)
+ rwbs[i++] = 'S';
+ if (rw & 1 << BIO-RW-META)
+ rwbs[i++] = 'M';
+
+ rwbs[i] = ' ';
+}
+
+void +
+ if (blk-pc-request(rq))
+ bytes = rq->data-len;
+ else
+ bytes = rq->hard-nr-sectors << 9;
+
+ #include <asm/tlbflush.h>
+#include <trace/events/block.h>
+
#define POOL-SIZE 64
#define ISA-POOL-SIZE 16
static mempool-t *page-pool, *isa-page-pool;
-DEFINE-TRACE(block-bio-bounce);
-
#ifdef CONFIG-HIGHMEM
static Please read the FAQ at http://www.tux.org/lkml/
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Ingo Molnar on
2009-05-18T08:40:59+00:00
* Li Zefan <lizf@cn.fujitsu.com> wrote:
> TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
> these new capabilities to this tracepoint:
>
> - zero-copy and per-cpu splice() tracing
> - binary tracing without printf overhead
> - structured logging records exposed under /debug/tracing/events
> - trace events embedded in function tracer output and other plugins
> - user-defined, per tracepoint filter expressions
> ...
Nice!
> Cons and problems:
>
> - no dev-t info for the output of plug, unplug-timer and unplug-io events.
> no dev-t info for getrq and sleeprq events if bio == NULL.
> no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
Cannot we output the numeric major:minor pairs?
> - for large packet commands, only 16 bytes of the command will be output.
> Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
> supports dynamic-sized strings.
>
> - a packet command is converted to a string in TP-assign, not TP-print.
> While blktrace do the convertion just before output.
Couldnt we do a memcpy instead of the snprintf() in common case we just return early without any snprintf overhead.
> - in blktrace, an event can have 2 different print formats, but
> a TRACE-EVENT has a unique format. (see the output of getrq
> and rq-insert)
Is this a problem?
I think a good way forward would be to benchmark the ioctl versus
the splice based TRACE-EVENT tracing (via some artificially high
rate event, to push things), and see where we are right now in terms
of overhead.
Ingo
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Frederic Weisbecker on
2009-05-18T13:05:49+00:00
On Thu, May 14, 2009 at 02:20:03PM +0800, Li Zefan wrote:
> TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
> these new capabilities to this tracepoint:
>
> - zero-copy and per-cpu splice() tracing
> - binary tracing without printf overhead
> - structured logging records exposed under /debug/tracing/events
> - trace events embedded in function tracer output and other plugins
> - user-defined, per tracepoint filter expressions
> ...
>
> Cons and problems:
>
> - no dev-t info for the output of plug, unplug-timer and unplug-io events.
> no dev-t info for getrq and sleeprq events if bio == NULL.
> no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
>
> - for large packet commands, only 16 bytes of the command will be output.
> Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
> supports dynamic-sized strings.
>
> - a packet command is converted to a string in TP-assign, not TP-print.
> While blktrace do the convertion just before output.
>
> - in blktrace, an event can have 2 different print formats, but a TRACE-EVENT
> has a unique format. (see the output of getrq and rq-insert)
I'm starting to think it would be nice to choose between several outputs
in a trace event.
Ie: perhaps we need a kind of per event flag, something simple, just to
choose between several TP-printk output. Not sure how much it would
(non) trivial to implement though...
Frederic.
>
> Following are some comparisons between TRACE-EVENT and blktrace:
>
> plug:
> kjournald-480 [000] 303.084981: block-plug: [kjournald]
> kjournald-480 [000] 303.084981: 8,0 P N [kjournald]
>
> unplug-io:
> kblockd/0-118 [000] 300.052973: block-unplug-io: [kblockd/0] 1
> kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1
>
> remap:
> kjournald-480 [000] 303.085042: block-remap: 8,0 W 102736992 + 8 <- (8,8) 33384
> kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384
>
> bio-backmerge:
> kjournald-480 [000] 303.085086: block-bio-backmerge: 8,0 W 102737032 + 8 [kjournald]
> kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald]
>
> getrq:
> kjournald-480 [000] 303.084974: block-getrq: 8,0 W 102736984 + 8 [kjournald]
> kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald]
>
> bash-2066 [001] 1072.953770: 8,0 G N [bash]
> bash-2066 [001] 1072.953773: block-getrq: 0,0 N 0 + 0 [bash]
>
> rq-complete:
> konsole-2065 [001] 300.053184: block-rq-complete: 8,0 W () 103669040 + 16 [0]
> konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0]
>
> rq-insert:
> kjournald-480 [000] 303.084985: block-rq-insert: 8,0 W 0 () 102736984 + 8 [kjournald]
> kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald]
>
> ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0]
> ksoftirqd/1-7 [001] 1072.953813: block-rq-complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]
>
> Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
>
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Li Zefan on
2009-05-19T06:03:57+00:00
Ingo Molnar wrote:
> * Li Zefan <lizf@cn.fujitsu.com> wrote:
>
>> TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
>> these new capabilities to this tracepoint:
>>
>> - zero-copy and per-cpu splice() tracing
>> - binary tracing without printf overhead
>> - structured logging records exposed under /debug/tracing/events
>> - trace events embedded in function tracer output and other plugins
>> - user-defined, per tracepoint filter expressions
>> ...
>
> Nice!
>
>> Cons and problems:
>>
>> - no dev-t info for the output of plug, unplug-timer and unplug-io events.
>> no dev-t info for getrq and sleeprq events if bio == NULL.
>> no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
>
> Cannot we output the numeric major:minor pairs?
>
No, we can't.
Take plug tracepoint for example, the only argument is a struct request-queue,
but we can't map from a queue to a device, since there is no 1:1 mapping.
That's why blktrace adds dev-t info in struct blk-trace, which is associated
to a queue.
>> - for large packet commands, only 16 bytes of the command will be output.
>> Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
>> supports dynamic-sized strings.
>>
>> - a packet command is converted to a string in TP-assign, not TP-print.
>> While blktrace do the convertion just before output.
>
> Couldnt we do a memcpy instead of the snprintf() in on Right. :)
>> - in blktrace, an event can have 2 different print formats, but
>> a TRACE-EVENT has a unique format. (see the output of getrq
>> and rq-insert)
>
> Is this a problem?
>
One of the defect is, we have > the splice based TRACE-EVENT tracing (via some artificially high
> rate event, to push things), and see where we are right now in terms
> of overhead.
>
I'll try.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Li Zefan on
2009-05-19T06:11:06+00:00
Frederic Weisbecker wrote:
> On Thu, May 14, 2009 at 02:20:03PM +0800, Li Zefan wrote:
>> TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
>> these new capabilities to this tracepoint:
>>
>> - zero-copy and per-cpu splice() tracing
>> - binary tracing without printf overhead
>> - structured logging records exposed under /debug/tracing/events
>> - trace events embedded in function tracer output and other plugins
>> - user-defined, per tracepoint filter expressions
>> ...
>>
>> Cons and problems:
>>
>> - no dev-t info for the output of plug, unplug-timer and unplug-io events.
>> no dev-t info for getrq and sleeprq events if bio == NULL.
>> no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
>>
>> - for large packet commands, only 16 bytes of the command will be output.
>> Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
>> supports dynamic-sized strings.
>>
>> - a packet command is converted to a string in TP-assign, not TP-print.
>> While blktrace do the convertion just before output.
>>
>> - in blktrace, an event can have 2 different print formats, but a TRACE-EVENT
>> has a unique format. (see the output of getrq and rq-insert)
>
> I'm starting to think it would be nice to choose between several outputs
> in a trace event.
> Ie: perhaps we need a kind of per event flag, something simple, just to
> choose between several TP-printk output. Not sure how much it would
> (non) trivial to implement though...
>
If a trace event wants several TP-printk output, probably it wants different
structs of trace entry, otherwise we'll be wasting memory.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Jeff Moyer on
2009-05-19T13:00:39+00:00
Li Zefan <lizf@cn.fujitsu.com> writes:
> TRACE-EVENT is a more generic way to define tracepoints. Doing so adds
> these new capabilities to this tracepoint:
> - zero-copy and per-cpu splice() tracing
> - binary tracing without printf overhead
> - structured logging records exposed under /debug/tracing/events
> - trace events embedded in function tracer output and other plugins
> - user-defined, per tracepoint filter expressions
> ...
>
> Cons and problems:
>
> - no dev-t info for the output of plug, unplug-timer and unplug-io events.
I'm not sure I'm comfortable with this. Having had to diagnose
performance problems using blktrace, I find the dev-t information for
plugs and unplugs especially interesting. Maybe we can still figure
this out given the context in the trace, though. Can you comment on
that?
> no dev-t info for getrq and sleeprq events if bio == NULL.
> no dev-t info for rq-abort,...,rq-requeue events if rq->rq-disk == NULL.
>
> - for large packet commands, only 16 bytes of the command will be output.
> Because TRACE-EVENT doesn't support dynamic-sized arrays, though it
> supports dynamic-sized strings.
>
> - a packet command is converted to a string in TP-assign, not TP-print.
> While blktrace do the convertion just before output.
>
> - in blktrace, an event can have 2 different print formats, but a TRACE-EVENT
> has a unique format. (see the output of getrq and rq-insert)
>
>
> Following are some comparisons between TRACE-EVENT and blktrace:
>
> plug:
> kjournald-480 [000] 303.084981: block-plug: [kjournald]
> kjournald-480 [000] 303.084981: 8,0 P N [kjournald]
I'm not a big fan of having to change my scripts! Why did you switch
from the single letter output to the verbose strings? Am I just being a
curmudgeon?
I have one further question, which will expose my ignorance on this
implementation. Would the proposed patch change the information
available to the blktrace utility, or is this just for users such as
systemtap?
Cheers,
Jeff
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Christoph Hellwig on
2009-05-19T13:10:20+00:00
On Tue, May 19, 2009 at 08:59:29AM -0400, Jeff Moyer wrote:
> I'm not sure I'm comfortable with this. Having had to diagnose
> performance problems using blktrace, I find the dev-t information for
> plugs and unplugs especially interesting. Maybe we can still figure
> this out given the context in the trace, though. Can you comment on
> that?
The problem is that right now we still allow request queues shared by
multiple gendisks, so we can't get at the gendisk. The blktrace ioctl
interface gets around that by doing the setup with an ioctl on the
block device node and storing the dev-t at that point.
But I don't think we actually have any drivers left sharing a queue
that way, and I'd love to get rid of that for various reasons.
> from the single letter output to the verbose strings? Am I just being a
> curmudgeon?
>
> I have one further question, which will expose my ignorance on this
> implementation. Would the proposed patch change the information
> available to the blktrace utility,
That's the output from /sys/kernel/debug/tracing/trace when the
block events are enabled. The blktrace utility should continue to
work without changes, although it could be ported to use the ftrace
binary ring buffer directly in the future.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by FUJITA Tomonori on
2009-05-19T15:57:35+00:00
On Tue, 19 May 2009 09:08:41 -0400
Christoph Hellwig <hch@infradead.org> wrote:
> On Tue, May 19, 2009 at 08:59:29AM -0400, Jeff Moyer wrote:
> > I'm not sure I'm comfortable with this. Having had to diagnose
> > performance problems using blktrace, I find the dev-t information for
> > plugs and unplugs especially interesting. Maybe we can still figure
> > this out given the context in the trace, though. Can you comment on
> > that?
>
> The problem is that right now we still allow request queues shared by
> multiple gendisks, so we can't get at the gendisk. The blktrace ioctl
> interface gets around that by doing the setup with an ioctl on the
> block device node and storing the dev-t at that point.
>
> But I don't think we actually have any drivers left sharing a queue
> that way, and I'd love to get rid of that for various reasons.
There are still some (e.g. drivers/block/floppy.c)? I though that we
hit this when we tried to add the command filter feature though I
might be wrong (I can't recall).
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Jens Axboe on
2009-05-19T17:34:31+00:00
On Wed, May 20 2009, FUJITA Tomonori wrote:
> On Tue, 19 May 2009 09:08:41 -0400
> Christoph Hellwig <hch@infradead.org> wrote:
>
> > On Tue, May 19, 2009 at 08:59:29AM -0400, Jeff Moyer wrote:
> > > I'm not sure I'm comfortable with this. Having had to diagnose
> > > performance problems using blktrace, I find the dev-t information for
> > > plugs and unplugs especially interesting. Maybe we can still figure
> > > this out given the context in the trace, though. Can you comment on
> > > that?
> >
> > The problem is that right now we still allow request queues shared by
> > multiple gendisks, so we can't get at the gendisk. The blktrace ioctl
> > interface gets around that by doing the setup with an ioctl on the
> > block device node and storing the dev-t at that point.
> >
> > But I don't think we actually have any drivers left sharing a queue
> > that way, and I'd love to get rid of that for various reasons.
>
> There are still some (e.g. drivers/block/floppy.c)? I though that we
> hit this when we tried to add the command filter feature though I
> might be wrong (I can't recall).
And mtd, iirc. But only a few. I'll fiddle up a patch to finally get rid
of this, it has been pending for... years.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Li Zefan on
2009-05-20T08:38:14+00:00
Christoph Hellwig wrote:
> On Tue, May 19, 2009 at 08:59:29AM -0400, Jeff Moyer wrote:
>> I'm not sure I'm comfortable with this. Having had to diagnose
>> performance problems using blktrace, I find the dev-t information for
>> plugs and unplugs especially interesting. Maybe we can still figure
>> this out given the context in the trace, though. Can you comment on
>> that?
>
> The problem is that right now we still allow request queues shared by
> multiple gendisks, so we can't get at the gendisk. The blktrace ioctl
> interface gets around that by doing the setup with an ioctl on the
> block device node and storing the dev-t at that point.
>
> But I don't think we actually have any drivers left sharing a queue
> that way, and I'd love to get rid of that for various reasons.
>
It will be great if this can be done.
>> from the single letter output to the verbose strings? Am I just being a
>> curmudgeon?
>>
>> I have one further question, which will expose my ignorance on this
>> implementation. Would the proposed patch change the information
>> available to the blktrace utility,
>
> That's the output from /sys/kernel/debug/tracing/trace when the
> block events are enabled. The blktrace utility should continue to
> work without changes, although it could be ported to use the ftrace
> binary ring buffer directly in the future.
>
Yes, this patch doesn't affect ioctl-based blktrace or ftrace-plugin
blktrace at all.
To use blktracer:
# echo 1 > /sys/block/sda/trace/enable
# echo blk > /debug/tracing/current-tracer
# cat /debug/tracing/trace-pipe
To use block trace events:
# echo 'block:*' > /debug/tracing/set-event
# cat /trace-pipe
You can mixed-use blktracer and trace events, to see how they differ
in the output.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Christoph Hellwig on
2009-05-23T12:40:58+00:00
On Wed, May 20, 2009 at 04:38:54PM +0800, Li Zefan wrote:
> Yes, this patch doesn't affect ioctl-based blktrace or ftrace-plugin
> blktrace at all.
Is there any good reason for keeping the ftrace plugin once we have
the fully function event tracer support for the block events?
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by KOSAKI Motohiro on
2009-05-24T05:16:31+00:00
> On Wed, May 20, 2009 at 04:38:54PM +0800, Li Zefan wrote:
> > Yes, this patch doesn't affect ioctl-based blktrace or ftrace-plugin
> > blktrace at all.
>
> Is there any good reason for keeping the ftrace plugin once we have
> the fully function event tracer support for the block events?
waiting userland blktrace tool convert using event tracer?
just guess :-)
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by Christoph Hellwig on
2009-05-24T08:57:40+00:00
On Sun, May 24, 2009 at 02:15:05PM +0900, KOSAKI Motohiro wrote:
> > Is there any good reason for keeping the ftrace plugin once we have
> > the fully function event tracer support for the block events?
>
> waiting userland blktrace tool convert using event tracer?
blktrace(8) uses the ioctl interface, which we should keep for a long
time for backwards compatiblity.
Re: RFC - PATCH - convert block trace points to TRACE_EVENT() by KOSAKI Motohiro on
2009-05-24T13:48:50+00:00
> On Sun, May 24, 2009 at 02:15:05PM +0900, KOSAKI Motohiro wrote:
> > > Is there any good reason for keeping the ftrace plugin once we have
> > > the fully function event tracer support for the block events?
> >
> > waiting userland blktrace tool convert using event tracer?
>
> blktrace(8) uses the ioctl interface, which we should keep for a long
> time for backwards compatiblity.
Ah, I misunderstand your point. sorry.
I though you suggested to remove ioctl. but it's reverse. ok. :)
So, I guess same formatting make easy understanding for administrator, maybe.