diff --git a/BUILD b/BUILD
index e0b14b5b7502a04bdf16bd3a12a8e3c369006d0a..df082e47787dc909e98f7b7c8a343db13d343a7f 100644
--- a/BUILD
+++ b/BUILD
@@ -296,7 +296,8 @@ cc_library(
     "src/core/json/json_reader.c",
     "src/core/json/json_string.c",
     "src/core/json/json_writer.c",
-    "src/core/profiling/timers.c",
+    "src/core/profiling/basic_timers.c",
+    "src/core/profiling/stap_timers.c",
     "src/core/statistics/census_init.c",
     "src/core/statistics/census_log.c",
     "src/core/statistics/census_rpc_stats.c",
@@ -503,7 +504,8 @@ cc_library(
     "src/core/json/json_reader.c",
     "src/core/json/json_string.c",
     "src/core/json/json_writer.c",
-    "src/core/profiling/timers.c",
+    "src/core/profiling/basic_timers.c",
+    "src/core/profiling/stap_timers.c",
     "src/core/statistics/census_init.c",
     "src/core/statistics/census_log.c",
     "src/core/statistics/census_rpc_stats.c",
diff --git a/Makefile b/Makefile
index 991bed3b428329e963f16beb819ebd25a9a324cc..bc3fc676f4bbc26ccb442f507e3f380616dde3a3 100644
--- a/Makefile
+++ b/Makefile
@@ -87,14 +87,23 @@ CPPFLAGS_opt = -O2
 LDFLAGS_opt =
 DEFINES_opt = NDEBUG
 
-VALID_CONFIG_latprof = 1
-CC_latprof = $(DEFAULT_CC)
-CXX_latprof = $(DEFAULT_CXX)
-LD_latprof = $(DEFAULT_CC)
-LDXX_latprof = $(DEFAULT_CXX)
-CPPFLAGS_latprof = -O2 -DGRPC_LATENCY_PROFILER
-LDFLAGS_latprof =
-DEFINES_latprof = NDEBUG
+VALID_CONFIG_basicprof = 1
+CC_basicprof = $(DEFAULT_CC)
+CXX_basicprof = $(DEFAULT_CXX)
+LD_basicprof = $(DEFAULT_CC)
+LDXX_basicprof = $(DEFAULT_CXX)
+CPPFLAGS_basicprof = -O2 -DGRPC_BASIC_PROFILER
+LDFLAGS_basicprof =
+DEFINES_basicprof = NDEBUG
+
+VALID_CONFIG_stapprof = 1
+CC_stapprof = $(DEFAULT_CC)
+CXX_stapprof = $(DEFAULT_CXX)
+LD_stapprof = $(DEFAULT_CC)
+LDXX_stapprof = $(DEFAULT_CXX)
+CPPFLAGS_stapprof = -O2 -DGRPC_STAP_PROFILER
+LDFLAGS_stapprof =
+DEFINES_stapprof = NDEBUG
 
 VALID_CONFIG_dbg = 1
 CC_dbg = $(DEFAULT_CC)
@@ -183,6 +192,7 @@ DEFINES_gcov = _DEBUG DEBUG
 prefix ?= /usr/local
 
 PROTOC = protoc
+DTRACE = dtrace
 CONFIG ?= opt
 CC = $(CC_$(CONFIG))
 CXX = $(CXX_$(CONFIG))
@@ -350,6 +360,8 @@ PERFTOOLS_CHECK_CMD = $(CC) $(CFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/perfto
 PROTOBUF_CHECK_CMD = $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/protobuf.cc -lprotobuf $(LDFLAGS)
 PROTOC_CHECK_CMD = which protoc > /dev/null
 PROTOC_CHECK_VERSION_CMD = protoc --version | grep -q libprotoc.3
+DTRACE_CHECK_CMD = which dtrace > /dev/null
+SYSTEMTAP_HEADERS_CHECK_CMD = $(CC) $(CFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/systemtap.c $(LDFLAGS)
 
 ifeq ($(OPENSSL_REQUIRES_DL),true)
 OPENSSL_ALPN_CHECK_CMD += -ldl
@@ -382,6 +394,18 @@ else
 HAS_VALID_PROTOC = false
 endif
 
+# Check for Systemtap (https://sourceware.org/systemtap/), first by making sure <sys/sdt.h> is present
+# in the system and secondly by checking for the "dtrace" binary (on Linux, this is part of the Systemtap
+# distribution. It's part of the base system on BSD/Solaris machines).
+HAS_SYSTEMTAP_HEADERS = $(shell $(SYSTEMTAP_HEADERS_CHECK_CMD) 2> /dev/null && echo true || echo false)
+HAS_DTRACE = $(shell $(DTRACE_CHECK_CMD) 2> /dev/null && echo true || echo false)
+HAS_SYSTEMTAP = false
+ifeq ($(HAS_SYSTEMTAP_HEADERS),true)
+ifeq ($(HAS_DTRACE),true)
+HAS_SYSTEMTAP = true
+endif
+endif
+
 ifeq ($(wildcard third_party/openssl/ssl/ssl.h),)
 HAS_EMBEDDED_OPENSSL_ALPN = false
 else
@@ -549,6 +573,17 @@ protoc_dep_message:
 	@echo "  make run_dep_checks"
 	@echo
 
+systemtap_dep_error:
+	@echo
+	@echo "DEPENDENCY ERROR"
+	@echo
+	@echo "Under the '$(CONFIG)' configutation, the target you are trying "
+	@echo "to build requires systemtap 2.7+ (on Linux) or dtrace (on other "
+	@echo "platforms such as Solaris and *BSD). "
+	@echo
+	@echo "Please consult INSTALL to get more information."
+	@echo
+
 stop:
 	@false
 
@@ -2779,6 +2814,18 @@ $(GENDIR)/test/proto/test.grpc.pb.cc: test/proto/test.proto $(PROTOBUF_DEP) $(PR
 endif
 
 
+ifeq ($(CONFIG),stapprof)
+src/core/profiling/stap_timers.c: $(GENDIR)/src/core/profiling/stap_probes.h
+ifeq ($(HAS_SYSTEMTAP),true)
+$(GENDIR)/src/core/profiling/stap_probes.h: src/core/profiling/stap_probes.d
+	$(E) "[DTRACE]  Compiling $<"
+	$(Q) mkdir -p `dirname $@`
+	$(Q) $(DTRACE) -C -h -s $< -o $@
+else
+$(GENDIR)/src/core/profiling/stap_probes.h: systemtap_dep_error stop
+endif
+endif
+
 $(OBJDIR)/$(CONFIG)/%.o : %.c
 	$(E) "[C]       Compiling $<"
 	$(Q) mkdir -p `dirname $@`
@@ -2799,7 +2846,6 @@ $(OBJDIR)/$(CONFIG)/%.o : %.cc
 	$(Q) mkdir -p `dirname $@`
 	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -MMD -MF $(addsuffix .dep, $(basename $@)) -c -o $@ $<
 
-
 install: install_c install_cxx install-plugins install-certs verify-install
 
 install_c: install-headers_c install-static_c install-shared_c
@@ -3196,7 +3242,8 @@ LIBGRPC_SRC = \
     src/core/json/json_reader.c \
     src/core/json/json_string.c \
     src/core/json/json_writer.c \
-    src/core/profiling/timers.c \
+    src/core/profiling/basic_timers.c \
+    src/core/profiling/stap_timers.c \
     src/core/statistics/census_init.c \
     src/core/statistics/census_log.c \
     src/core/statistics/census_rpc_stats.c \
@@ -3444,7 +3491,8 @@ LIBGRPC_UNSECURE_SRC = \
     src/core/json/json_reader.c \
     src/core/json/json_string.c \
     src/core/json/json_writer.c \
-    src/core/profiling/timers.c \
+    src/core/profiling/basic_timers.c \
+    src/core/profiling/stap_timers.c \
     src/core/statistics/census_init.c \
     src/core/statistics/census_log.c \
     src/core/statistics/census_rpc_stats.c \
diff --git a/build.json b/build.json
index 5a33a6551810b5a4037ec1cd35f59dae98ce9b8e..d682c02bc4b2cdd8cbfca017636c3d2c58183f86 100644
--- a/build.json
+++ b/build.json
@@ -231,7 +231,8 @@
         "src/core/json/json_reader.c",
         "src/core/json/json_string.c",
         "src/core/json/json_writer.c",
-        "src/core/profiling/timers.c",
+        "src/core/profiling/basic_timers.c",
+        "src/core/profiling/stap_timers.c",
         "src/core/statistics/census_init.c",
         "src/core/statistics/census_log.c",
         "src/core/statistics/census_rpc_stats.c",
diff --git a/src/core/iomgr/pollset_posix.c b/src/core/iomgr/pollset_posix.c
index 60d0dad6d87bc7f34c234e6b9e89f7bdee6cd350..4d1bcad9e2707be00f05d578cc0f7f811784ae10 100644
--- a/src/core/iomgr/pollset_posix.c
+++ b/src/core/iomgr/pollset_posix.c
@@ -411,7 +411,7 @@ static int unary_poll_pollset_maybe_work(grpc_pollset *pollset,
   pfd[1].events = grpc_fd_begin_poll(fd, pollset, POLLIN, POLLOUT, &fd_watcher);
 
   r = poll(pfd, GPR_ARRAY_SIZE(pfd), timeout);
-  GRPC_TIMER_MARK(POLL_FINISHED, r);
+  GRPC_TIMER_MARK(GRPC_PTAG_POLL_FINISHED, r);
 
   grpc_fd_end_poll(&fd_watcher);
 
diff --git a/src/core/iomgr/tcp_posix.c b/src/core/iomgr/tcp_posix.c
index 86721e9c9552c778ffc01f4742ae3d9a5c0d75b2..40897fb8f86494a5f12b2776989637d1a10cedee 100644
--- a/src/core/iomgr/tcp_posix.c
+++ b/src/core/iomgr/tcp_posix.c
@@ -327,7 +327,7 @@ static void grpc_tcp_handle_read(void *arg /* grpc_tcp */, int success) {
   gpr_slice *final_slices;
   size_t final_nslices;
 
-  GRPC_TIMER_MARK(HANDLE_READ_BEGIN, 0);
+  GRPC_TIMER_BEGIN(GRPC_PTAG_HANDLE_READ, 0);
   slice_state_init(&read_state, static_read_slices, INLINE_SLICE_BUFFER_SIZE,
                    0);
 
@@ -350,11 +350,11 @@ static void grpc_tcp_handle_read(void *arg /* grpc_tcp */, int success) {
     msg.msg_controllen = 0;
     msg.msg_flags = 0;
 
-    GRPC_TIMER_MARK(RECVMSG_BEGIN, 0);
+    GRPC_TIMER_BEGIN(GRPC_PTAG_RECVMSG, 0);
     do {
       read_bytes = recvmsg(tcp->fd, &msg, 0);
     } while (read_bytes < 0 && errno == EINTR);
-    GRPC_TIMER_MARK(RECVMSG_END, 0);
+    GRPC_TIMER_END(GRPC_PTAG_RECVMSG, 0);
 
     if (read_bytes < allocated_bytes) {
       /* TODO(klempner): Consider a second read first, in hopes of getting a
@@ -406,7 +406,7 @@ static void grpc_tcp_handle_read(void *arg /* grpc_tcp */, int success) {
       ++iov_size;
     }
   }
-  GRPC_TIMER_MARK(HANDLE_READ_END, 0);
+  GRPC_TIMER_END(GRPC_PTAG_HANDLE_READ, 0);
 }
 
 static void grpc_tcp_notify_on_read(grpc_endpoint *ep, grpc_endpoint_read_cb cb,
@@ -438,12 +438,12 @@ static grpc_endpoint_write_status grpc_tcp_flush(grpc_tcp *tcp) {
     msg.msg_controllen = 0;
     msg.msg_flags = 0;
 
-    GRPC_TIMER_MARK(SENDMSG_BEGIN, 0);
+    GRPC_TIMER_BEGIN(GRPC_PTAG_SENDMSG, 0);
     do {
       /* TODO(klempner): Cork if this is a partial write */
       sent_length = sendmsg(tcp->fd, &msg, 0);
     } while (sent_length < 0 && errno == EINTR);
-    GRPC_TIMER_MARK(SENDMSG_END, 0);
+    GRPC_TIMER_END(GRPC_PTAG_SENDMSG, 0);
 
     if (sent_length < 0) {
       if (errno == EAGAIN) {
@@ -479,7 +479,7 @@ static void grpc_tcp_handle_write(void *arg /* grpc_tcp */, int success) {
     return;
   }
 
-  GRPC_TIMER_MARK(CB_WRITE_BEGIN, 0);
+  GRPC_TIMER_BEGIN(GRPC_PTAG_TCP_CB_WRITE, 0);
   write_status = grpc_tcp_flush(tcp);
   if (write_status == GRPC_ENDPOINT_WRITE_PENDING) {
     grpc_fd_notify_on_write(tcp->em_fd, &tcp->write_closure);
@@ -495,7 +495,7 @@ static void grpc_tcp_handle_write(void *arg /* grpc_tcp */, int success) {
     cb(tcp->write_user_data, cb_status);
     grpc_tcp_unref(tcp);
   }
-  GRPC_TIMER_MARK(CB_WRITE_END, 0);
+  GRPC_TIMER_END(GRPC_PTAG_TCP_CB_WRITE, 0);
 }
 
 static grpc_endpoint_write_status grpc_tcp_write(grpc_endpoint *ep,
@@ -518,7 +518,7 @@ static grpc_endpoint_write_status grpc_tcp_write(grpc_endpoint *ep,
     }
   }
 
-  GRPC_TIMER_MARK(WRITE_BEGIN, 0);
+  GRPC_TIMER_BEGIN(GRPC_PTAG_TCP_WRITE, 0);
   GPR_ASSERT(tcp->write_cb == NULL);
   slice_state_init(&tcp->write_state, slices, nslices, nslices);
 
@@ -532,7 +532,7 @@ static grpc_endpoint_write_status grpc_tcp_write(grpc_endpoint *ep,
     grpc_fd_notify_on_write(tcp->em_fd, &tcp->write_closure);
   }
 
-  GRPC_TIMER_MARK(WRITE_END, 0);
+  GRPC_TIMER_END(GRPC_PTAG_TCP_WRITE, 0);
   return status;
 }
 
diff --git a/src/core/profiling/timers.c b/src/core/profiling/basic_timers.c
similarity index 76%
rename from src/core/profiling/timers.c
rename to src/core/profiling/basic_timers.c
index bd1700ffd8cc7a9cf4d604be6bc4781a532d94de..d89bba7b8788283cca4c3b1fc6f05eabbbfe1253 100644
--- a/src/core/profiling/timers.c
+++ b/src/core/profiling/basic_timers.c
@@ -31,7 +31,9 @@
  *
  */
 
-#ifdef GRPC_LATENCY_PROFILER
+#include <grpc/support/port_platform.h>
+
+#ifdef GRPC_BASIC_PROFILER
 
 #include "src/core/profiling/timers.h"
 #include "src/core/profiling/timers_preciseclock.h"
@@ -46,7 +48,7 @@
 typedef struct grpc_timer_entry {
   grpc_precise_clock tm;
   gpr_thd_id thd;
-  const char* tag;
+  int tag;
   void* id;
   const char* file;
   int line;
@@ -63,7 +65,7 @@ struct grpc_timers_log {
 
 grpc_timers_log* grpc_timers_log_global = NULL;
 
-grpc_timers_log* grpc_timers_log_create(int capacity_limit, FILE* dump) {
+static grpc_timers_log* grpc_timers_log_create(int capacity_limit, FILE* dump) {
   grpc_timers_log* log = gpr_malloc(sizeof(*log));
 
   /* TODO (vpai): Allow allocation below limit */
@@ -87,15 +89,15 @@ static void log_report_locked(grpc_timers_log* log) {
     grpc_timer_entry* entry = &(log->log[i]);
     fprintf(fp, "GRPC_LAT_PROF ");
     grpc_precise_clock_print(&entry->tm, fp);
-    fprintf(fp, " %p %s %p %s %d\n", (void*)(gpr_intptr)entry->thd, entry->tag, entry->id, entry->file,
-            entry->line);
+    fprintf(fp, " %p %d %p %s %d\n", (void*)(gpr_intptr)entry->thd, entry->tag,
+            entry->id, entry->file, entry->line);
   }
 
   /* Now clear out the log */
   log->num_entries = 0;
 }
 
-void grpc_timers_log_destroy(grpc_timers_log* log) {
+static void grpc_timers_log_destroy(grpc_timers_log* log) {
   gpr_mu_lock(&log->mu);
   log_report_locked(log);
   gpr_mu_unlock(&log->mu);
@@ -106,8 +108,8 @@ void grpc_timers_log_destroy(grpc_timers_log* log) {
   gpr_free(log);
 }
 
-void grpc_timers_log_add(grpc_timers_log* log, const char* tag, void* id,
-                         const char* file, int line) {
+static void grpc_timers_log_add(grpc_timers_log* log, int tag, void* id,
+                                const char* file, int line) {
   grpc_timer_entry* entry;
 
   /* TODO (vpai) : Improve concurrency */
@@ -128,14 +130,25 @@ void grpc_timers_log_add(grpc_timers_log* log, const char* tag, void* id,
   gpr_mu_unlock(&log->mu);
 }
 
-void grpc_timers_log_global_init(void) {
+/* Latency profiler API implementation. */
+void grpc_timer_add_mark(int tag, void* id, const char* file, int line) {
+  grpc_timers_log_add(grpc_timers_log_global, tag, id, file, line);
+}
+
+void grpc_timer_begin(int tag, void* id, const char *file, int line) {}
+void grpc_timer_end(int tag, void* id, const char *file, int line) {}
+
+/* Basic profiler specific API functions. */
+void grpc_timers_global_init(void) {
   grpc_timers_log_global = grpc_timers_log_create(100000, stdout);
 }
 
-void grpc_timers_log_global_destroy(void) {
+void grpc_timers_global_destroy(void) {
   grpc_timers_log_destroy(grpc_timers_log_global);
 }
-#else  /* !GRPC_LATENCY_PROFILER */
-void grpc_timers_log_global_init(void) {}
-void grpc_timers_log_global_destroy(void) {}
-#endif /* GRPC_LATENCY_PROFILER */
+
+
+#else  /* !GRPC_BASIC_PROFILER */
+void grpc_timers_global_init(void) {}
+void grpc_timers_global_destroy(void) {}
+#endif /* GRPC_BASIC_PROFILER */
diff --git a/src/core/profiling/stap_probes.d b/src/core/profiling/stap_probes.d
new file mode 100644
index 0000000000000000000000000000000000000000..374eeedd6c06abea95d5e9e29287c1d56592723b
--- /dev/null
+++ b/src/core/profiling/stap_probes.d
@@ -0,0 +1,6 @@
+provider _stap {
+	probe add_mark(int tag);
+	probe timing_ns_begin(int tag);
+	probe timing_ns_end(int tag);
+};
+
diff --git a/src/core/profiling/stap_timers.c b/src/core/profiling/stap_timers.c
new file mode 100644
index 0000000000000000000000000000000000000000..52fb58a27978e5b4e660be606405dfae1841fab0
--- /dev/null
+++ b/src/core/profiling/stap_timers.c
@@ -0,0 +1,57 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#ifdef GRPC_STAP_PROFILER
+
+#include "src/core/profiling/timers.h"
+
+#include <sys/sdt.h>
+/* Generated from src/core/profiling/stap_probes.d */
+#include "src/core/profiling/stap_probes.h"
+
+/* Latency profiler API implementation. */
+void grpc_timer_add_mark(int tag, void* id, const char *file, int line) {
+  _STAP_ADD_MARK(tag);
+}
+
+void grpc_timer_begin(int tag, void* id, const char *file, int line) {
+  _STAP_TIMING_NS_BEGIN(tag);
+}
+
+void grpc_timer_end(int tag, void* id, const char *file, int line) {
+  _STAP_TIMING_NS_END(tag);
+}
+
+#endif /* GRPC_STAP_PROFILER */
diff --git a/src/core/profiling/timers.h b/src/core/profiling/timers.h
index 1a6b9ffac90f72771378076d956e0115037b6a7c..d0b8286c03eceb77f7a546b3c6d355193d2d1443 100644
--- a/src/core/profiling/timers.h
+++ b/src/core/profiling/timers.h
@@ -34,35 +34,87 @@
 #ifndef GRPC_CORE_PROFILING_TIMERS_H
 #define GRPC_CORE_PROFILING_TIMERS_H
 
-#include <stdio.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#ifdef GRPC_LATENCY_PROFILER
+void grpc_timers_global_init(void);
+void grpc_timers_global_destroy(void);
 
-typedef struct grpc_timers_log grpc_timers_log;
+void grpc_timer_add_mark(int tag, void* id, const char *file, int line);
+void grpc_timer_begin(int tag, void* id, const char *file, int line);
+void grpc_timer_end(int tag, void* id, const char *file, int line);
 
-grpc_timers_log* grpc_timers_log_create(int capacity_limit, FILE* dump);
-void grpc_timers_log_add(grpc_timers_log*, const char* tag, void* id,
-                         const char* file, int line);
-void grpc_timers_log_destroy(grpc_timers_log *);
+enum grpc_profiling_tags {
+  /* Any GRPC_PTAG_* >= than the threshold won't generate any profiling mark. */
+  GRPC_PTAG_IGNORE_THRESHOLD = 1000000,
 
-extern grpc_timers_log *grpc_timers_log_global;
+  /* Re. Protos. */
+  GRPC_PTAG_PROTO_SERIALIZE = 100 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_PROTO_DESERIALIZE = 101 + GRPC_PTAG_IGNORE_THRESHOLD,
+
+  /* Re. sockets. */
+  GRPC_PTAG_HANDLE_READ = 200 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_SENDMSG = 201 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_RECVMSG = 202 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_POLL_FINISHED = 203 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_TCP_CB_WRITE = 204 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_TCP_WRITE = 205 + GRPC_PTAG_IGNORE_THRESHOLD,
+
+  /* C++ */
+  GRPC_PTAG_CPP_CALL_CREATED = 300 + GRPC_PTAG_IGNORE_THRESHOLD,
+  GRPC_PTAG_CPP_PERFORM_OPS = 301 + GRPC_PTAG_IGNORE_THRESHOLD,
 
-#define GRPC_TIMER_MARK(x, s) \
-  grpc_timers_log_add(grpc_timers_log_global, #x, ((void *)(gpr_intptr)(s)), \
-                      __FILE__, __LINE__)
+  /* > 1024 Unassigned reserved. For any miscellaneous use.
+  * Use addition to generate tags from this base or take advantage of the 10
+  * zero'd bits for OR-ing. */
+  GRPC_PTAG_OTHER_BASE = 1024
+};
 
-#else /* !GRPC_LATENCY_PROFILER */
-#define GRPC_TIMER_MARK(x, s) \
-  do {                        \
-  } while (0)
-#endif /* GRPC_LATENCY_PROFILER */
+#if !(defined(GRPC_STAP_PROFILER) + defined(GRPC_BASIC_PROFILER))
+/* No profiling. No-op all the things. */
+#define GRPC_TIMER_MARK(tag, id) \
+  do {} while(0)
+
+#define GRPC_TIMER_BEGIN(tag, id) \
+  do {} while(0)
+
+#define GRPC_TIMER_END(tag, id) \
+  do {} while(0)
+
+#else /* at least one profiler requested... */
+/* ... hopefully only one. */
+#if defined(GRPC_STAP_PROFILER) && defined(GRPC_BASIC_PROFILER)
+#error "GRPC_STAP_PROFILER and GRPC_BASIC_PROFILER are mutually exclusive."
+#endif
+
+/* Generic profiling interface. */
+#define GRPC_TIMER_MARK(tag, id)                                              \
+  if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {                                     \
+    grpc_timer_add_mark(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
+  }
+
+#define GRPC_TIMER_BEGIN(tag, id)                                             \
+  if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {                                     \
+    grpc_timer_begin(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__);    \
+  }
+
+#define GRPC_TIMER_END(tag, id)                                               \
+  if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {                                     \
+    grpc_timer_end(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__);      \
+  }
+
+#ifdef GRPC_STAP_PROFILER
+/* Empty placeholder for now. */
+#endif /* GRPC_STAP_PROFILER */
+
+#ifdef GRPC_BASIC_PROFILER
+typedef struct grpc_timers_log grpc_timers_log;
+
+extern grpc_timers_log *grpc_timers_log_global;
+#endif /* GRPC_BASIC_PROFILER */
 
-void grpc_timers_log_global_init(void);
-void grpc_timers_log_global_destroy(void);
+#endif /* at least one profiler requested. */
 
 #ifdef __cplusplus
 }
diff --git a/src/core/surface/init.c b/src/core/surface/init.c
index 5a119a47cc79e1a93cc2eff3da2d458f4b558766..bfee28e5fc1bc5d2b7d0f0d106d2f1ee98a13384 100644
--- a/src/core/surface/init.c
+++ b/src/core/surface/init.c
@@ -64,7 +64,7 @@ void grpc_init(void) {
     grpc_iomgr_init();
     grpc_tracer_init("GRPC_TRACE");
     census_init();
-    grpc_timers_log_global_init();
+    grpc_timers_global_init();
   }
   gpr_mu_unlock(&g_init_mu);
 }
@@ -74,7 +74,7 @@ void grpc_shutdown(void) {
   if (--g_initializations == 0) {
     grpc_iomgr_shutdown();
     census_shutdown();
-    grpc_timers_log_global_destroy();
+    grpc_timers_global_destroy();
   }
   gpr_mu_unlock(&g_init_mu);
 }
diff --git a/src/cpp/client/channel.cc b/src/cpp/client/channel.cc
index c541ddfb487ac43f0cb60d95167e2f732326aa7a..475a20d883236837b260144a716c7f4655150a07 100644
--- a/src/cpp/client/channel.cc
+++ b/src/cpp/client/channel.cc
@@ -70,7 +70,7 @@ Call Channel::CreateCall(const RpcMethod& method, ClientContext* context,
                                          ? target_.c_str()
                                          : context->authority().c_str(),
                                      context->raw_deadline());
-  GRPC_TIMER_MARK(CALL_CREATED, c_call);
+  GRPC_TIMER_MARK(GRPC_PTAG_CPP_CALL_CREATED, c_call);
   context->set_call(c_call, shared_from_this());
   return Call(c_call, this, cq);
 }
@@ -79,11 +79,11 @@ void Channel::PerformOpsOnCall(CallOpBuffer* buf, Call* call) {
   static const size_t MAX_OPS = 8;
   size_t nops = MAX_OPS;
   grpc_op ops[MAX_OPS];
-  GRPC_TIMER_MARK(PERFORM_OPS_BEGIN, call->call());
+  GRPC_TIMER_BEGIN(GRPC_PTAG_CPP_PERFORM_OPS, call->call());
   buf->FillOps(ops, &nops);
   GPR_ASSERT(GRPC_CALL_OK ==
              grpc_call_start_batch(call->call(), ops, nops, buf));
-  GRPC_TIMER_MARK(PERFORM_OPS_END, call->call());
+  GRPC_TIMER_END(GRPC_PTAG_CPP_PERFORM_OPS, call->call());
 }
 
 void* Channel::RegisterMethod(const char* method) {
diff --git a/src/cpp/common/call.cc b/src/cpp/common/call.cc
index 9878133331d7f7e2ba1d5018be667802fb09c669..a60282316ae945d3be8f9a4e9a205c8970087494 100644
--- a/src/cpp/common/call.cc
+++ b/src/cpp/common/call.cc
@@ -232,13 +232,13 @@ void CallOpBuffer::FillOps(grpc_op* ops, size_t* nops) {
   }
   if (send_message_ || send_message_buffer_) {
     if (send_message_) {
-      GRPC_TIMER_MARK(SER_PROTO_BEGIN, 0);
+      GRPC_TIMER_BEGIN(GRPC_PTAG_PROTO_SERIALIZE, 0);
       bool success = SerializeProto(*send_message_, &send_buf_);
       if (!success) {
         abort();
         // TODO handle parse failure
       }
-      GRPC_TIMER_MARK(SER_PROTO_END, 0);
+      GRPC_TIMER_END(GRPC_PTAG_PROTO_SERIALIZE, 0);
     } else {
       send_buf_ = send_message_buffer_->buffer();
     }
@@ -310,10 +310,10 @@ bool CallOpBuffer::FinalizeResult(void** tag, bool* status) {
     if (recv_buf_) {
       got_message = *status;
       if (recv_message_) {
-        GRPC_TIMER_MARK(DESER_PROTO_BEGIN, 0);
+        GRPC_TIMER_BEGIN(GRPC_PTAG_PROTO_DESERIALIZE, 0);
         *status = *status && DeserializeProto(recv_buf_, recv_message_);
         grpc_byte_buffer_destroy(recv_buf_);
-        GRPC_TIMER_MARK(DESER_PROTO_END, 0);
+        GRPC_TIMER_END(GRPC_PTAG_PROTO_DESERIALIZE, 0);
       } else {
         recv_message_buffer_->set_buffer(recv_buf_);
       }
diff --git a/src/cpp/server/server.cc b/src/cpp/server/server.cc
index 4694a3a7ff53e5e7b7a569f56e09f8699007925f..34aac4c5845c9b9285aa32167b57b3afe80a5da7 100644
--- a/src/cpp/server/server.cc
+++ b/src/cpp/server/server.cc
@@ -124,12 +124,12 @@ class Server::SyncRequest GRPC_FINAL : public CompletionQueueTag {
       std::unique_ptr<grpc::protobuf::Message> req;
       std::unique_ptr<grpc::protobuf::Message> res;
       if (has_request_payload_) {
-        GRPC_TIMER_MARK(DESER_PROTO_BEGIN, call_.call());
+        GRPC_TIMER_BEGIN(GRPC_PTAG_PROTO_DESERIALIZE, call_.call());
         req.reset(method_->AllocateRequestProto());
         if (!DeserializeProto(request_payload_, req.get())) {
           abort();  // for now
         }
-        GRPC_TIMER_MARK(DESER_PROTO_END, call_.call());
+        GRPC_TIMER_END(GRPC_PTAG_PROTO_DESERIALIZE, call_.call());
       }
       if (has_response_payload_) {
         res.reset(method_->AllocateResponseProto());
@@ -346,9 +346,9 @@ class Server::AsyncRequest GRPC_FINAL : public CompletionQueueTag {
     bool orig_status = *status;
     if (*status && request_) {
       if (payload_) {
-        GRPC_TIMER_MARK(DESER_PROTO_BEGIN, call_);
+        GRPC_TIMER_BEGIN(GRPC_PTAG_PROTO_DESERIALIZE, call_);
         *status = DeserializeProto(payload_, request_);
-        GRPC_TIMER_MARK(DESER_PROTO_END, call_);
+        GRPC_TIMER_END(GRPC_PTAG_PROTO_DESERIALIZE, call_);
       } else {
         *status = false;
       }
diff --git a/templates/Makefile.template b/templates/Makefile.template
index 9a12cf99226e6d9a43374b3f1208f995ef1118c7..1e7a2a49bdde0332b94e771e6a8b55972b405a55 100644
--- a/templates/Makefile.template
+++ b/templates/Makefile.template
@@ -101,14 +101,23 @@ CPPFLAGS_opt = -O2
 LDFLAGS_opt =
 DEFINES_opt = NDEBUG
 
-VALID_CONFIG_latprof = 1
-CC_latprof = $(DEFAULT_CC)
-CXX_latprof = $(DEFAULT_CXX)
-LD_latprof = $(DEFAULT_CC)
-LDXX_latprof = $(DEFAULT_CXX)
-CPPFLAGS_latprof = -O2 -DGRPC_LATENCY_PROFILER
-LDFLAGS_latprof =
-DEFINES_latprof = NDEBUG
+VALID_CONFIG_basicprof = 1
+CC_basicprof = $(DEFAULT_CC)
+CXX_basicprof = $(DEFAULT_CXX)
+LD_basicprof = $(DEFAULT_CC)
+LDXX_basicprof = $(DEFAULT_CXX)
+CPPFLAGS_basicprof = -O2 -DGRPC_BASIC_PROFILER
+LDFLAGS_basicprof =
+DEFINES_basicprof = NDEBUG
+
+VALID_CONFIG_stapprof = 1
+CC_stapprof = $(DEFAULT_CC)
+CXX_stapprof = $(DEFAULT_CXX)
+LD_stapprof = $(DEFAULT_CC)
+LDXX_stapprof = $(DEFAULT_CXX)
+CPPFLAGS_stapprof = -O2 -DGRPC_STAP_PROFILER
+LDFLAGS_stapprof =
+DEFINES_stapprof = NDEBUG
 
 VALID_CONFIG_dbg = 1
 CC_dbg = $(DEFAULT_CC)
@@ -197,6 +206,7 @@ DEFINES_gcov = _DEBUG DEBUG
 prefix ?= /usr/local
 
 PROTOC = protoc
+DTRACE = dtrace
 CONFIG ?= opt
 CC = $(CC_$(CONFIG))
 CXX = $(CXX_$(CONFIG))
@@ -364,6 +374,8 @@ PERFTOOLS_CHECK_CMD = $(CC) $(CFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/perfto
 PROTOBUF_CHECK_CMD = $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/protobuf.cc -lprotobuf $(LDFLAGS)
 PROTOC_CHECK_CMD = which protoc > /dev/null
 PROTOC_CHECK_VERSION_CMD = protoc --version | grep -q libprotoc.3
+DTRACE_CHECK_CMD = which dtrace > /dev/null
+SYSTEMTAP_HEADERS_CHECK_CMD = $(CC) $(CFLAGS) $(CPPFLAGS) -o $(TMPOUT) test/build/systemtap.c $(LDFLAGS)
 
 ifeq ($(OPENSSL_REQUIRES_DL),true)
 OPENSSL_ALPN_CHECK_CMD += -ldl
@@ -396,6 +408,18 @@ else
 HAS_VALID_PROTOC = false
 endif
 
+# Check for Systemtap (https://sourceware.org/systemtap/), first by making sure <sys/sdt.h> is present
+# in the system and secondly by checking for the "dtrace" binary (on Linux, this is part of the Systemtap
+# distribution. It's part of the base system on BSD/Solaris machines).
+HAS_SYSTEMTAP_HEADERS = $(shell $(SYSTEMTAP_HEADERS_CHECK_CMD) 2> /dev/null && echo true || echo false)
+HAS_DTRACE = $(shell $(DTRACE_CHECK_CMD) 2> /dev/null && echo true || echo false)
+HAS_SYSTEMTAP = false
+ifeq ($(HAS_SYSTEMTAP_HEADERS),true)
+ifeq ($(HAS_DTRACE),true)
+HAS_SYSTEMTAP = true
+endif
+endif
+
 ifeq ($(wildcard third_party/openssl/ssl/ssl.h),)
 HAS_EMBEDDED_OPENSSL_ALPN = false
 else
@@ -575,6 +599,17 @@ protoc_dep_message:
 	@echo "  make run_dep_checks"
 	@echo
 
+systemtap_dep_error:
+	@echo
+	@echo "DEPENDENCY ERROR"
+	@echo
+	@echo "Under the '$(CONFIG)' configutation, the target you are trying "
+	@echo "to build requires systemtap 2.7+ (on Linux) or dtrace (on other "
+	@echo "platforms such as Solaris and *BSD). "
+	@echo
+	@echo "Please consult INSTALL to get more information."
+	@echo
+
 stop:
 	@false
 
@@ -863,6 +898,18 @@ endif
 
 % endfor
 
+ifeq ($(CONFIG),stapprof)
+src/core/profiling/stap_timers.c: $(GENDIR)/src/core/profiling/stap_probes.h
+ifeq ($(HAS_SYSTEMTAP),true)
+$(GENDIR)/src/core/profiling/stap_probes.h: src/core/profiling/stap_probes.d
+	$(E) "[DTRACE]  Compiling $<"
+	$(Q) mkdir -p `dirname $@`
+	$(Q) $(DTRACE) -C -h -s $< -o $@
+else
+$(GENDIR)/src/core/profiling/stap_probes.h: systemtap_dep_error stop
+endif
+endif
+
 $(OBJDIR)/$(CONFIG)/%.o : %.c
 	$(E) "[C]       Compiling $<"
 	$(Q) mkdir -p `dirname $@`
@@ -883,7 +930,6 @@ $(OBJDIR)/$(CONFIG)/%.o : %.cc
 	$(Q) mkdir -p `dirname $@`
 	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -MMD -MF $(addsuffix .dep, $(basename $@)) -c -o $@ $<
 
-
 install: install_c install_cxx install-plugins install-certs verify-install
 
 install_c: install-headers_c install-static_c install-shared_c
diff --git a/test/build/systemtap.c b/test/build/systemtap.c
new file mode 100644
index 0000000000000000000000000000000000000000..66ff38ebd6d70c65fdacf125dcad1a934c3eb0ca
--- /dev/null
+++ b/test/build/systemtap.c
@@ -0,0 +1,42 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <sys/sdt.h>
+
+#ifndef _SYS_SDT_H
+#error "_SYS_SDT_H not defined, despite <sys/sdt.h> being present."
+#endif
+
+int main() {
+  return 0;
+}
diff --git a/test/core/profiling/mark_timings.stp b/test/core/profiling/mark_timings.stp
new file mode 100644
index 0000000000000000000000000000000000000000..0c0a417faf74370c2b23b0a4bab7695b32e532b1
--- /dev/null
+++ b/test/core/profiling/mark_timings.stp
@@ -0,0 +1,40 @@
+/* This script requires a command line argument, to be used in the "process"
+ * probe definition.
+ *
+ * For a statically build binary, that'd be the name of the binary itself.
+ * For dinamically built ones, point to the location of the libgprc.so being
+ * used. */
+
+global starts, times, times_per_tag
+
+probe process(@1).mark("timing_ns_begin") {
+  starts[$arg1, tid()] = gettimeofday_ns();
+}
+
+probe process(@1).mark("timing_ns_end") {
+  tag = $arg1
+  t = gettimeofday_ns();
+  if (s = starts[tag, tid()]) {
+     times[tag, tid()] <<< t-s;
+     delete starts[tag, tid()];
+  }
+}
+
+probe end {
+  printf("%15s %9s %10s %10s %10s %10s\n", "tag", "tid", "count",
+    "min(ns)", "avg(ns)", "max(ns)");
+  foreach ([tag+, tid] in times) {
+    printf("%15X %9d %10d %10d %10d %10d\n", tag, tid, @count(times[tag, tid]),
+          @min(times[tag, tid]), @avg(times[tag, tid]), @max(times[tag, tid]));
+  }
+
+  printf("Per tag average of averages\n");
+  foreach ([tag+, tid] in times) {
+    times_per_tag[tag] <<< @avg(times[tag, tid]);
+  }
+  printf("%15s %10s %10s\n", "tag", "count", "avg(ns)");
+  foreach ([tag+] in times_per_tag) {
+    printf("%15X %10d %10d\n", tag, @count(times_per_tag[tag]),
+                                    @avg(times_per_tag[tag]));
+  }
+}
diff --git a/test/core/profiling/timers_test.c b/test/core/profiling/timers_test.c
index 55e59c969e8c62a68b48e8b32ce7ec4c46bb5df6..12b08c115e5dc7cb74f54ac8740af630e5b2ec2e 100644
--- a/test/core/profiling/timers_test.c
+++ b/test/core/profiling/timers_test.c
@@ -76,8 +76,8 @@ void test_log_events(int num_seqs) {
 
 int main(int argc, char **argv) {
   grpc_test_init(argc, argv);
-  grpc_timers_log_global_init();
+  grpc_timers_global_init();
   test_log_events(1000000);
-  grpc_timers_log_global_destroy();
+  grpc_timers_global_destroy();
   return 0;
 }
diff --git a/vsprojects/grpc/grpc.vcxproj b/vsprojects/grpc/grpc.vcxproj
index 38920bb15ec24a7e0a60b3ff67e35be26f627c81..0007f494227e525dfe3460abf9f720d69dd36b5b 100644
--- a/vsprojects/grpc/grpc.vcxproj
+++ b/vsprojects/grpc/grpc.vcxproj
@@ -399,7 +399,9 @@
     </ClCompile>
     <ClCompile Include="..\..\src\core\json\json_writer.c">
     </ClCompile>
-    <ClCompile Include="..\..\src\core\profiling\timers.c">
+    <ClCompile Include="..\..\src\core\profiling\basic_timers.c">
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\profiling\stap_timers.c">
     </ClCompile>
     <ClCompile Include="..\..\src\core\statistics\census_init.c">
     </ClCompile>
diff --git a/vsprojects/grpc/grpc.vcxproj.filters b/vsprojects/grpc/grpc.vcxproj.filters
index edce8d965262859f34b8f26c85b4a27e6492d3cc..bd467e06c82f71a669e990445e416a4d65b4de7f 100644
--- a/vsprojects/grpc/grpc.vcxproj.filters
+++ b/vsprojects/grpc/grpc.vcxproj.filters
@@ -211,7 +211,10 @@
     <ClCompile Include="..\..\src\core\json\json_writer.c">
       <Filter>src\core\json</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\src\core\profiling\timers.c">
+    <ClCompile Include="..\..\src\core\profiling\basic_timers.c">
+      <Filter>src\core\profiling</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\profiling\stap_timers.c">
       <Filter>src\core\profiling</Filter>
     </ClCompile>
     <ClCompile Include="..\..\src\core\statistics\census_init.c">
diff --git a/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj b/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj
index d1291051076c07e9d1b42e908e3a4dfe5bff0b5c..c1572259abb0af8ed36d46d11f3ed6270f757dba 100644
--- a/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj
+++ b/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj
@@ -344,7 +344,9 @@
     </ClCompile>
     <ClCompile Include="..\..\src\core\json\json_writer.c">
     </ClCompile>
-    <ClCompile Include="..\..\src\core\profiling\timers.c">
+    <ClCompile Include="..\..\src\core\profiling\basic_timers.c">
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\profiling\stap_timers.c">
     </ClCompile>
     <ClCompile Include="..\..\src\core\statistics\census_init.c">
     </ClCompile>
diff --git a/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj.filters b/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj.filters
index daca2c0c5a038b129581f31a4c611595e02bfc3e..0bee095da3d729aba37e78919d72e648a8fba2ff 100644
--- a/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj.filters
+++ b/vsprojects/grpc_unsecure/grpc_unsecure.vcxproj.filters
@@ -154,7 +154,10 @@
     <ClCompile Include="..\..\src\core\json\json_writer.c">
       <Filter>src\core\json</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\src\core\profiling\timers.c">
+    <ClCompile Include="..\..\src\core\profiling\basic_timers.c">
+      <Filter>src\core\profiling</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\profiling\stap_timers.c">
       <Filter>src\core\profiling</Filter>
     </ClCompile>
     <ClCompile Include="..\..\src\core\statistics\census_init.c">