Logo Search packages:      
Sourcecode: qprof version File versions  Download package

prof_utils_libpfm3.c

/*
 * This is a port of prof_utils.c to libpfm3.  The port was contributed
 * by Peter Chubb.
 *
 * This version should only be used with libpfm3, which is only available
 * on Itanium machines running test versions of Linux 2.6. (On non-Itanium
 * machines it should behave exactly like the original.)
 * It is expected to eventually become the default.
 */
/*
 * Copyright (c) 2003 by Hewlett-Packard Company.  All rights reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE. 
 */

#include "prof_utils.h"
#include "q-lib.h"

#include <errno.h>
#include <link.h>
#include <sys/time.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <execinfo.h>
#include <link.h>

#define AO_REQUIRE_CAS        /* Really we need fetch_and_add.    */
                        /* Compare-and-swap gives us that.  */
#include "atomic_ops.h"
#ifdef HAVE_LIBUNWIND_H
#include <libunwind.h>
#else
#define NO_UNWIND
#endif

static long interval = 10000; /* Profiling interval.  Usecs or count.      */
static int real_time = 1;     /* using real or virtual time? */

static void QPROF_default_error(const char *msg)
{
  write (2, msg, strlen (msg));
  write (2, "\n", 1);
  if (getenv("QPROF_LOOP_ON_ERROR") == NULL) {
    for (;;);
  }
  abort ();
}

static void QPROF_default_warn(const char *msg)
{
  write (2, msg, strlen (msg));
  write (2, "\n", 1);
}
  
  
void (*QPROF_error)(const char *msg) = QPROF_default_error;

void (*QPROF_warn)(const char *msg) = QPROF_default_warn;

typedef enum
{ TIMER, HW_EVENT, OTHER }
signal_sources;
      /* add others as necessary ... */


static volatile AO_T sigaction_called = 0;

#if defined(__ia64__) && defined(HW_EVENT_SUPPORT)

/*
 * Support for profiling based on Itanium hardware performance counters.
 * This uses Stephane Eranian's perfmon kernel support and libpfm
 * library.  We try to revert to timer-based sampling if any pieces
 * are missing.
 *
 * The intent here is that we could provide alternate implementations
 * of these routines for other hardware and performance counter libraries.
 * For higher level libraries this is likely to be either easier or
 * impossible.
 *
 * This borrows heavily from Stephane's sample code.
 */

# include <errno.h>
# include <perfmon/pfmlib.h>
# include <perfmon/perfmon.h>
# include <dlfcn.h>
# include <fcntl.h>
# include <asm/unistd.h>

#define NUM_PMCS PFMLIB_MAX_PMCS
#define NUM_PMDS PFMLIB_MAX_PMDS
#define HW_SIGNO SIGPROF
#define HAVE_HW_EVENTS

int ctx_fd;


static __inline int gettid(void)
{
      pid_t pid;
      if ((pid = syscall(__NR_gettid)) == -1)
            return getpid();
      return pid;
}

/* The following may be written repeatedly by different threads.  */
/* But all writes should write the same value.                    */
static typeof(perfmonctl) * my_perfmonctl;
static void (* my_pfm_start)(int fd);
static void (* my_pfm_stop)(int fd);

/*
 * Returns 1 if we set things up to receive signals in response to
 * PMU events.  We do so only if QPROF_HW_EVENT was set to a proper PMU
 * event.
 */
static int
QPROF_setup_hw_event (long interval)
{
  void *pfm_handle;
  const char *event_string = getenv("QPROF_HW_EVENT");
  pfarg_context_t ctx[1];
  pfmlib_input_param_t inp;
  pfmlib_output_param_t outp;
  pfarg_reg_t pc[NUM_PMCS];
  pfarg_reg_t pd[NUM_PMDS];
  int ret;
  int i;
  pfarg_load_t load_args;
  typeof(pfm_initialize) * my_pfm_initialize;
  typeof(pfm_find_event) * my_pfm_find_event;
  typeof(pfm_dispatch_events) * my_pfm_dispatch_events;

  if (0 == event_string) return 0;
  if ((pfm_handle = dlopen("libpfm.so.3", RTLD_LAZY)) == NULL)
    {
      QPROF_warn("Can't load libpfm: using timer instead.\n");
      return 0;
    }
  my_pfm_initialize = dlsym(pfm_handle, "pfm_initialize");
  my_pfm_find_event = dlsym(pfm_handle, "pfm_find_event");
  my_pfm_dispatch_events = dlsym(pfm_handle, "pfm_dispatch_events");
  my_perfmonctl = dlsym(pfm_handle, "perfmonctl");
  my_pfm_start = dlsym(pfm_handle, "pfm_self_start");
  my_pfm_stop = dlsym(pfm_handle, "pfm_self_stop");

  if (!my_pfm_initialize || !my_pfm_find_event || !my_pfm_dispatch_events
      || !my_perfmonctl /* || !my_pfm_start || !my_pfm_stop */)
    {
      QPROF_warn("Missing libpfm symbol: using timer instead.\n");
      return 0;
    }
  if (my_pfm_initialize() != PFMLIB_SUCCESS)
    {
      QPROF_warn("Can't initialize perfmon library: using timer instead.\n");
      return 0;
    }

  memset(ctx, 0, sizeof(ctx));
  memset(&inp, 0, sizeof inp);
  memset(&outp, 0, sizeof outp);

  /*
   * Prepare parameter structure for pfm_dispatch_events.
   * We don't use any Itanium specific features here.
   * So evt.pfp_model is NULL.
   */
  ret = my_pfm_find_event(/* FIXME */ (char *)event_string, &inp.pfp_events[0].event);
  if (ret != PFMLIB_SUCCESS)
    {
      QPROF_warn("Cannot find event: using timer instead\n");
      return 0;
    }
  /*
   * set the default privilege mode for all counters:
   *  PFM_PLM3 : user level only
   */
  inp.pfp_dfl_plm = PFM_PLM3; 

  /*
   * how many counters we use
   */
  inp.pfp_event_count = 1;

  /*
   * Let the library figure out the values for the PMC registers.
   * This reads inp.pfp_events (and friends) and writes efp.pfp_pc
   * (and friends).
   */
  if ((ret = my_pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS)
    {
      QPROF_warn("Cannot configure events: using timer instead\n");
    }
  /*
   * now create the context for self monitoring/per-task
   */
  if (my_perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 )
    {
      if (errno == ENOSYS)
      QPROF_warn("Kernel lacks performance monitoring support: "
                 "Using timer.\n");
      else
      QPROF_warn("Failed to create perfmon context: "
                 "Using timer.\n");
      return 0;
  }

 /*
  * extract the identifier for our context
  */
  ctx_fd = ctx[0].ctx_fd;


 /*
  *       
  * Now prepare the argument to initialize the PMDs and PMCS.
  * We must pfp_pmc_count to determine the number of PMC to intialize.
  * We must use pfp_event_count to determine the number of PMD to initialize.
  * Some events causes extra PMCs to be used, so  pfp_pmc_count may be > pfp_event_count.
  * This step is new compared to libpfm-2.x. It is necessary because the library no
  * longer knows about the kernel data structures.
  */
  for (i=0; i < outp.pfp_pmc_count; i++) {
        pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
        pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
  }

  /*
   * the PMC controlling the event ALWAYS come first, that's why this loop
   * is safe even when extra PMC are needed to support a particular event.
   */
  for (i=0; i < inp.pfp_event_count; i++) {
       pd[i].reg_num   = pc[i].reg_num;
  }


  /*
   * Arrange to be notified on counter overflow.
   */
  pc[0].reg_flags       |= PFM_REGFL_OVFL_NOTIFY;
  pc[0].reg_reset_pmds[0] |= 1UL << pc[1].reg_num;


  /*
   * Initialize PMDs.
   */
    {
      unsigned i;

      memset(pd, 0, sizeof(pd));
      for (i=0; i < inp.pfp_event_count; i++)
        pd[i].reg_num = pc[i].reg_num;
    }
    /*
     * we arm the first counter, such that it will overflow
     * after interval events have been observed
     */
    pd[0].reg_value       = (~0UL) - interval + 1;
    pd[0].reg_long_reset  = (~0UL) - interval + 1;

  /*
   * Now program the registers.
   */
  if (my_perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1)
    QPROF_error("perfmonctl(PFM_WRITE_PMCS...) failed.\n");

  if (my_perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
    QPROF_error("perfmonctl(PFM_WRITE_PMDS...) failed.\n");
  }

  /*
   * we want to monitor ourself
   */
   load_args.load_pid = gettid();

   if (my_perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
      QPROF_error("perfmonctl error PFM_LOAD_CONTEXT");
   }

   /*
    * setup asynchronous notification on the file descriptor
    */
   ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC);
   if (ret == -1) {
      QPROF_error("cannot set ASYNC on perfmon context fd\n");
   }

   /*
    * get ownership of the descriptor
    */
   ret = fcntl(ctx_fd, F_SETOWN, getpid());
      if (ret == -1) {
          QPROF_error("cannot setown perfmon context fd\n");
   }
   ret = fcntl(ctx_fd, F_SETSIG, (long)HW_SIGNO);
   if (ret == -1) {
      QPROF_error("cannot set signal on perfmon context fd\n");
   }

  return 1;
}

/*
 * Start monitoring.
 */
void
QPROF_start_hw_event()
{
  my_pfm_start(ctx_fd);
}

/*
 * Stop monitoring.
 */
void
QPROF_stop_hw_event()
{
  my_pfm_stop(ctx_fd);

  if (getpid() == gettid())
        close(ctx_fd);
}

/*
 * Restart monitoring.
 * This needs to be done in the signal handler.
 */
void
QPROF_restart_hw_event(struct siginfo *si)
{
  pfm_msg_t msg;

  if (si->si_fd != ctx_fd)
        return;

  read(ctx_fd, &msg, sizeof msg);
  if (my_perfmonctl(ctx_fd, PFM_RESTART,NULL, 0) == -1)
    QPROF_error("Perfmonctl(...PFM_RESTART...) failed\n");
}


#endif

#ifndef HAVE_HW_EVENTS

# define QPROF_setup_hw_event(interval) 0
# define QPROF_start_hw_event()
# define QPROF_stop_hw_event()
# define QPROF_restart_hw_event(si)
# define HW_SIGNO -1    /* Shouldn't really be used. Must be defined.   */

#endif /* HAVE_HW_EVENTS */

static signal_sources source = TIMER;

int
QPROF_setup_signals (void (*handler) (int, siginfo_t *, void *))
{
  int timer = ITIMER_PROF;
  int QPROF_signo = SIGPROF;

  {
    char *interval_string = getenv ("QPROF_INTERVAL");

    if (NULL != interval_string)
      {
      long tmp = atoi (interval_string);
      if (tmp >= 1)
        {
          interval = tmp;
        }
      else
        QPROF_error("QPROF_INTERVAL must be >= 1");
      }
  }
  if (QPROF_setup_hw_event(interval))
    source = HW_EVENT;
  real_time = 1;
  if (0 != getenv ("QPROF_REAL"))
    {
      timer = ITIMER_REAL;
      QPROF_signo = SIGALRM;
    }
  if (0 != getenv ("QPROF_VIRTUAL"))
    {
      timer = ITIMER_VIRTUAL;
      QPROF_signo = SIGVTALRM;
      real_time = 0;
    }
  if (source == HW_EVENT)
    QPROF_signo = HW_SIGNO;
  if (!AO_load(&sigaction_called))  /* Probably first thread */
    {
      AO_store_release(&sigaction_called, 1);  /* Visible after AO_load */
      /* Set up signal handler. OK to do in multiple threads. */
      {
      struct sigaction act;

#     if defined(__i386__) || defined(__alpha__)
        act.sa_flags = SA_RESTART;
#     else
        act.sa_flags = SA_RESTART | SA_SIGINFO;
#     endif
      act.sa_sigaction = handler;
      sigemptyset(&act.sa_mask);
      sigaction (QPROF_signo, &act, 0);
      }
    }

  if (TIMER == source)
    {
      struct itimerval interval_spec;

      /* fprintf (stderr, "Setting timer in %d\n", getpid ()); */
      if (interval >= 1000000)
      interval = 999000;
      interval_spec.it_value.tv_usec =
      interval_spec.it_interval.tv_usec = interval;
      interval_spec.it_value.tv_sec = interval_spec.it_interval.tv_sec = 0;
      if (0 != setitimer (timer, &interval_spec, 0))
      QPROF_error("Setitimer for QPROF_setup_signals failed");
    }
  if (HW_EVENT == source)
    QPROF_start_hw_event();
  return QPROF_signo;
}

#include <stdio.h>

char *
QPROF_get_exe_name (void)
{
# define EXE_SZ 100
  static char exe_name[EXE_SZ];
  static int found_exe_name = 0;
  static char *result;
  int ret_code;

  if (found_exe_name)
    return result;
  /* FIXME - This is probably not 100% thread-safe. */
  ret_code = readlink ("/proc/self/exe", exe_name, EXE_SZ);
  if (ret_code < 0 || ret_code >= EXE_SZ || exe_name[0] != '/')
    {
      result = 0;
      found_exe_name = 1;     /* Dont try again. */
      return result;
    }
  exe_name[ret_code] = '\0';
  found_exe_name = 1;
  result = exe_name;
  return result;
}

/*
 * Write a pc value in human-readable form into result_buf, of length ncols+1,
 * padding the total output to ncols.
 */
void
QPROF_format_pc (char *result_buf, unsigned long pc, size_t ncols)
{
  static unsigned long reentry_count = 0;
  FILE *pipe;
  unsigned long old_count = AO_fetch_and_add1_acquire(&reentry_count);

  if (old_count > 0)
    {
      /* Avoid excessive recursion; punt      */
      sprintf (result_buf, "[%p]", (void *) pc);
      goto done;
    }
  /* Try for a line number with the addr2line program. */
  {
#   define CMD_SZ 200
    char cmd_buf[CMD_SZ];
    size_t result_len;
    char *exe_name;
    char *old_preload;
#   define PRELOAD_SZ 200
    char preload_buf[PRELOAD_SZ];
    static int will_always_fail = 0;
    char * addr2line_path;
#   define TMP_SZ 1024
    char tmp_buf[TMP_SZ];

    /* First we get the name of the executable:             */
    if (will_always_fail)
      goto fail;
    exe_name = QPROF_get_exe_name ();
    if (exe_name == 0 || strcmp(exe_name,"/bin/bash") == 0)
      {
      /* We don't try this inside bash, since it seems to handle  */
      /* environments explicitly, and hence our updates don't work.     */
      will_always_fail = 1;   /* Dont try again. */
      goto fail;
      }
    /* Then we use popen to start addr2line -e <exe> <addr> */
    /* There are faster ways to do this, but hopefully this */
    /* isn't time critical.                                 */
    /* In the best case this gets us a string of the form   */
    /* <fn name>\n<full path name>:<line no.>.            */
    addr2line_path = getenv("QPROF_ADDR2LINE");
    if (0 == addr2line_path)
      addr2line_path = "/usr/bin/addr2line";
    snprintf (cmd_buf, CMD_SZ,
            "%s -C -f -e %s 0x%lx", addr2line_path, exe_name, pc);
    old_preload = getenv ("LD_PRELOAD");
    if (0 != old_preload)
      {
      if (strlen (old_preload) >= PRELOAD_SZ)
        {
          will_always_fail = 1;
          goto fail;
        }
      strcpy (preload_buf, old_preload);
      unsetenv ("LD_PRELOAD");
      }
    pipe = popen (cmd_buf, "r");
    if (0 != old_preload && 0 != setenv ("LD_PRELOAD", preload_buf, 0))
      {
      abort ();
      }
    if (pipe != NULL)
      {
      result_len = fread (tmp_buf, 1, TMP_SZ, pipe);
      pclose(pipe);
      }
    if (pipe == NULL || result_len <= 0)
      {
      will_always_fail = 1;
      goto fail;
      }
    if (tmp_buf[result_len - 1] == '\n')
      --result_len;
    tmp_buf[result_len] = '\0';
    if (tmp_buf[result_len - 2] == ':' && tmp_buf[result_len - 1] == '0')
      {
      if (tmp_buf[0] == '?')
        goto fail;
      else
        {
          /* The function name tends to be better than what we get      */
          /* from backtrace_symbols.                              */
          char *nl = strchr (tmp_buf, '\n');
          if (nl == NULL || nl == tmp_buf)
            goto fail;
          *nl = '\0';
          result_len = nl - tmp_buf;
        }
      }
    /* Get rid of embedded newline, if any.                     */
    /* Remove everything but last pathname component in the file.   */
    {
      char *nl = strchr (tmp_buf, '\n');
      char *slash;
      if (nl != NULL)
      {
        *nl = ':';
        slash = strrchr (nl, '/');
        if (0 != slash)
          {
            memmove (nl + 1, slash + 1, strlen (slash) /* incl. null */ );
            result_len -= (slash - nl);
          }
      }
    }
    /* We are finished shrinking the string.  Now copy it to its real   */
    /* destination.                                         */
    if (result_len > ncols)
      {
      result_len = ncols;
      tmp_buf[result_len] = '\0';
      }
    strcpy(result_buf, tmp_buf);
    if (result_len < ncols - ((sizeof(char *) == 4)? 14: 22))
      {
      /* Add in hex address */
      sprintf (result_buf + result_len, " [%p]", (void *) pc);
      }
    goto done;
  fail:
    {
      unsigned long my_pc = pc;
      char **sym_name = backtrace_symbols ((void **) (&my_pc), 1);
      char *name = strrchr (sym_name[0], '/');

      if (0 == name)
      {
        name = sym_name[0];
      }
      else
      {
        /* Use only the last component of the file name plus      */
        /* what follows.                                          */
        ++name;
      }
      strncpy (result_buf, name, ncols);
      result_buf[ncols] = '\0';
      free (sym_name);
    }
  }
done:
  {
    size_t result_len = strlen (result_buf);
    size_t i;

    for (i = result_len; i < ncols; ++i)
      {
      result_buf[i] = ' ';
      }
    result_buf[ncols] = '\0';
  }
  AO_fetch_and_sub1_release(&reentry_count);
}

/*
 * Routines to update and print the contents of a list of PC samples.
 * FIXME: Implement a variant for hsd_map.
 */

volatile struct prof_entry
{
  AO_T pc;
} *QPROF_buffer = 0;

static int
prof_compare (const void *arg1, const void *arg2)
{
  unsigned long first = ((struct prof_entry *) arg1)->pc;
  unsigned long second = ((struct prof_entry *) arg2)->pc;

  if (first < second)
    return -1;
  if (first > second)
    return 1;
  return 0;
}

unsigned long QPROF_buf_size = 400000;

volatile AO_T QPROF_buf_pos = 0;  /* Next position in buffer to be used */

volatile AO_T QPROF_samples = 0;  /* Number of times we sampled.  Only  */
                          /* incremented once even if entire      */
                          /* call stack is sampled.         */

volatile AO_T QPROF_buffer_overflowed = 0;

static void
add_sample(unsigned long pc)
{
  unsigned long my_index = AO_fetch_and_add1_release(&QPROF_buf_pos);
  /* Current write may not be visible when QPROF_buf_pos is */
  /* incremented.  Earlier writes are.                      */

  if (my_index < QPROF_buf_size)
    {
      AO_store(&(QPROF_buffer[my_index].pc ), pc);
    }
  else
    {
      /* Make wrap-around VERY unlikely.              */
      /* This effectively truncates the profile.      */
      AO_store(&QPROF_buf_pos, QPROF_buf_size);
      AO_store(&QPROF_buffer_overflowed, 1);
    }
}


#if defined(__i386__)
  /* FIXME: This assumes SA_SIGINFO is not specified, which makes */
  /* this uglier than necessary.  Currently our patched libunwind       */
  /* makes the same assumption, so it's premature to fix this.          */
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) &si; \
    unsigned long pc = (AO_T)(sc->eip)
#elif defined(__ia64__)
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_ip)
#elif defined(__hppa__)
# include <ucontext.h>
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_iaoq[0]) & ~3 /* Remove PL bits */;
    /* FIXME: The above offset of 24 is an empirically derived hack,    */
    /* which appears to work on 32 bit kernels.  As I recall, we should */
    /* be using ucontext_t * as the type of the last argument.  But it  */
    /* appears hard to extract iaoq[0] fromt that.                */
#else
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_pc)
#endif

void
QPROF_pc_sample_list_handler (int signo, siginfo_t * si, void *scv)
{
  SET_PC;

  add_sample(pc);
  AO_fetch_and_add1(&QPROF_samples);
  if (HW_EVENT == source)
    QPROF_restart_hw_event(si);
}

#ifndef NO_UNWIND
void
QPROF_pc_sample_list_stack_handler (int signo, siginfo_t * si, void *scv)
{
  unw_context_t ucntxt;
  unw_cursor_t c;
  int saw_signal_frame = 0;

  if (unw_getcontext(&ucntxt) != 0)
    QPROF_error("unw_getcontext failed");
  if (unw_init_local(&c, &ucntxt) != 0)
    QPROF_error("unw_init_local failed");
  do
    {
      unw_word_t ip;

      if (saw_signal_frame)
      {
          if (unw_get_reg(&c, UNW_REG_IP, &ip) != 0)
          QPROF_error("unw_get_reg failed");
          add_sample(ip);
      }
      else
        saw_signal_frame = (unw_is_signal_frame(&c) > 0);
    }
  while (unw_step(&c) > 0);
  AO_fetch_and_add1(&QPROF_samples);
  if (HW_EVENT == source)
    QPROF_restart_hw_event(si);
}
#endif /* !NO_UNWIND */

void QPROF_pc_sample_list_init()
{
  char *buf_size_string = getenv ("QPROF_BUFFER_SIZE");

  if (NULL != buf_size_string)
    {
      long tmp = atoi (buf_size_string);
      if (tmp >= 10 && tmp < LONG_MAX / sizeof (struct prof_entry))
        {
          QPROF_buf_size = tmp;
        }
      else
      QPROF_error("QPROF_BUFFER_SIZE must be >= 10, < address space");
    }
  QPROF_buffer = sbrk (QPROF_buf_size * sizeof (struct prof_entry));
  if ((void *) (-1) == QPROF_buffer)
    QPROF_error("Sbrk for QPROF_buffer failed");
}

static int QPROF_color = -1;
                  /* Character attribute value in escape sequence. */
                  /* -1 means no color change.               */
                  /* Conceivably this could really be a font       */
                  /* change etc.                             */
static int QPROF_undo_color = -1;
                  /* Character attribute to change color back;     */
                  /* currently a guess.  -1 means no need to       */
                  /* change back.                            */

static int QPROF_color_initialized = 0;

void QPROF_start_color(FILE *f)
{
  if (!QPROF_color_initialized)
    {
      char * color_string = getenv ("QPROF_COLOR");
      if (color_string != 0)
        {
          if (strcmp(color_string, "blue") == 0)
            QPROF_color = 34;
          else if (strcmp(color_string, "red") == 0)
            QPROF_color = 31;
          else if (strcmp(color_string, "green") == 0)
            QPROF_color = 32;
          else
            {
              QPROF_color = atoi(color_string);
              if (QPROF_color < 1 || QPROF_color >= 60)
            QPROF_color = 34; /* blue fg */
            }
          if (QPROF_color >= 30 && QPROF_color <= 38)
          QPROF_undo_color = 39;  /* default fg QPROF_color */
          if (QPROF_color >= 40 && QPROF_color <= 48)
          QPROF_undo_color = 49;  /* default bg QPROF_color */
          if (QPROF_color >= 1 && QPROF_color <= 9)
          QPROF_undo_color = 0;   /* default mode     */
          if (QPROF_color >= 11 && QPROF_color <= 19)
          QPROF_undo_color = 10;  /* primary font     */
        }
      QPROF_color_initialized = 1;
    }

  if (QPROF_color != -1)
    fprintf(f, "\033[%dm", QPROF_color);
}

void QPROF_end_color(FILE *f)
{
  if (QPROF_undo_color != -1)
    fprintf(f, "\033[%dm", QPROF_undo_color);
}

struct map
  {
    struct map *next;
    unsigned long long start;
    unsigned long long end;
    unsigned long long offset;
    const char *path;
  };

struct callback_info
  {
    struct map *maps;
    FILE *fp;
  };

static struct map*
QPROF_get_maps (void)
{
  FILE *fp = fopen ("/proc/self/maps", "r");
  struct map *m, *list = NULL, *list_end = NULL;
  char *line = NULL;
  size_t line_size = 0;

  while (getline (&line, &line_size, fp) > 0)
    {
      unsigned long long start, end, offset;
      char path[4096], perm[4];

      if (sscanf (line, "%llx-%llx %4s %llx %*x:%*x %*u %4096s\n",
              &start, &end, perm, &offset, path) == 5)
      {
        if (strncmp (perm, "---", 3) == 0)
          /* ignore inaccessible mappings */
          continue;

#if 0
        printf ("start=%llx, size=%llu, off=%llx, path=%s\n",
              start, end - start, offset, path);
#endif
        m = malloc (sizeof (*m));
        if (!m)
          {
            fprintf (stderr, "qprof: failed to allocate %Zu bytes (%s)\n",
                   sizeof (*m), strerror (errno));
            return list;      /* return what we have... */
          }

        memset (m, 0, sizeof (*m));
        m->start = start;
        m->end = end;
        m->offset = offset;
        m->path = strdup (path);
        m->next = NULL;
        if (list_end)
          {
            list_end->next = m;
            list_end = m;
          }
        else
          list = list_end = m;
      }
    }
  if (line)
    free (line);
  fclose (fp);
  return list;
}

static int
QPROF_write_object_info (struct dl_phdr_info *info, size_t size, void *data)
{
  struct callback_info *ci = data;
  const char *path, *name;
  FILE *ofp = ci->fp;
  char buf[PATH_MAX];
  struct map *m;
  int first = 1;

  if (info->dlpi_name[0])
    path = info->dlpi_name;
  else
    path = QPROF_get_exe_name ();

  name = strrchr (path, '/');
  if (name)
    ++name;
  else
    name = path;

  path = "";

  if (size < sizeof (struct dl_phdr_info))
    return -1;

  for (m = ci->maps; m; m = m->next)
    if (m->start <= (unsigned long long) (size_t) info->dlpi_phdr
      && m->end > (unsigned long long) (size_t) info->dlpi_phdr)
      {
      path = m->path;
      break;
      }

  fprintf (ofp, "(q:object '((q:name . \"%s\")\n", name);

  if (!path[0])
    return 0;

  for (m = ci->maps; m; m = m->next)
    {
      if (m->path[0] == path[0] && strcmp (m->path, path) == 0)
      {
        if (first)
          {
            first = 0;
            q_checksummed_link (Q_LINK_ANY, buf, sizeof (buf), name, path);
            fprintf (ofp,
                   "            (q:file . \"%s\")\n"
                   "            (q:maps . (",
                   buf);
          }
        else
          fprintf (ofp, "\n                        ");
        fprintf (ofp, "((q:addr . #x%llx) (q:size . %llu) "
               "(q:offset . #x%llx))",
               m->start, m->end - m->start, m->offset);
      }
    }
  if (!first)
    fprintf (ofp, "))");
  fprintf (ofp, "))\n");
  return 0;
}

void
QPROF_pc_sample_list_write_q_profile (void)
{
    unsigned long my_buf_size = AO_load_acquire_read(&QPROF_buf_pos);
    char buf[PATH_MAX], *exe_path, *prog_name;
    struct map *maps, *m, *m_next;
    const char *hist_file_name;
    unsigned long count = 0;
    FILE *info, *hist, *fp;
    pid_t pid = getpid ();
    unsigned long i;

    exe_path = QPROF_get_exe_name ();
    prog_name = strrchr (exe_path, '/');
    if (prog_name)
      ++prog_name;
    else
      prog_name = exe_path;

    snprintf (buf, sizeof (buf), "%s-pid%d.info", prog_name, pid);
    info = q_create_file (buf, sizeof (buf));
    if (!info)
      {
      fprintf (stderr, "qprof: couldn't create info file `%s'\n", buf);
      return;
      }

    /* dump the command-line to the info file: */
    fprintf (info, "(q:info '((q:cmdline . (");
    fp = fopen ("/proc/self/cmdline", "r");
    if (fp)
      {
      char *line = NULL;
      size_t line_size = 0;
      int not_first = 0;

      while (!feof (fp))
        {
          if (getdelim (&line, &line_size, '\0', fp) >= 0)
            {
            if (not_first)
              fputc (' ', info);
            fprintf (info, "\"%s\"", line);
            not_first = 1;
            }
        }
      if (line)
        free (line);
      fclose (fp);
      }
    fprintf (info, "))))\n");

    /* dump object info: */
    maps = QPROF_get_maps ();
    if (maps)
      {
      struct callback_info ci;
      ci.maps = maps;
      ci.fp = info;
      dl_iterate_phdr (QPROF_write_object_info, &ci);
      for (m = maps; m; m = m_next)
        {
          m_next = m->next;
          if (m->path)
            free ((void *) m->path);
          free (m);
        }
      }

    snprintf (buf, sizeof (buf), "%s-pid%d.hist", prog_name, pid);
    hist = q_create_file (buf, sizeof (buf));
    if (!hist)
      {
      fprintf (stderr, "qprof: couldn't create histogram file\n");
      return;
      }
    hist_file_name = strdup (buf);

    fprintf (info,
           "(q:histogram '((q:file . \"%s\")\n"
           "                (q:x-unit-label . \"address\")",
           hist_file_name);
    if (source == TIMER)
      {
      fprintf (info, "\n"
             "                (q:event-name . \"%s\")\n"
             "                (q:y-unit-label . \"seconds\")\n"
             "                (q:y-unit-conversion-factor . %g)\n"
             "                (q:y-granularity . %g)",
             (real_time ? "time" : "virtual-time"),
             interval * 1e-6, interval * 1e-6);
      }
    else
      {
      const char *event_name = getenv ("QPROF_HW_EVENT");

      /* XXX make this more general and independent of Itanium PMU... */
      if (strcmp (event_name, "CPU_CYCLES") == 0)
        fprintf (info, "\n"
               "                (q:y-unit-label . \"seconds\")");
      fprintf (info, "\n"
             "                (q:event-name . \"%s\")\n"
             "                (q:y-granularity . %ld)\n",
             event_name, interval);
      }
    fprintf (info, "))\n");

    qsort ((struct prof_entry *) QPROF_buffer, my_buf_size,
         sizeof (struct prof_entry), prof_compare);
    for (i = 0; i < my_buf_size; ++i) {
      if (AO_load(&QPROF_buffer[i].pc) == 0)
          /* This can happen if a signal handler is still running */
          continue;
      ++count;
      if (i + 1 < my_buf_size
          && prof_compare((const void *) (QPROF_buffer + i),
                      (const void *) (QPROF_buffer + i + 1)) == 0)
          continue;
      fprintf (hist, "0x%lx %lu\n", QPROF_buffer[i].pc, count);
      count = 0;
    }

    q_close (hist);
    q_close (info);
}

void
QPROF_pc_sample_list_print_profile (FILE * f)
{
  unsigned long my_buf_size = AO_load_acquire_read(&QPROF_buf_pos);
  size_t i;
  unsigned long repeated = 0;
  char *summary_level = getenv ("QPROF_GRANULARITY");
  int per_function = 0, per_instruction = 0, per_line = 1;
  char *ncols_text = getenv ("QPROF_NCOLS");
# define MAX_NCOLS 1024
# define MIN_NCOLS 30
# define DEFAULT_NCOLS 64
  int ncols = DEFAULT_NCOLS;
  char prev_text[MAX_NCOLS + 1];
  char curr_text[MAX_NCOLS + 1];
  unsigned long nsamples;
  
  if (HW_EVENT == source)
    QPROF_stop_hw_event();

  QPROF_start_color(f);
  if (ncols_text != 0)
    {
      ncols = atoi(ncols_text);
      if (ncols < MIN_NCOLS)
      {
        fprintf(f, "Setting QPROF_NCOLS to minimum of %d\n", MIN_NCOLS);
        ncols = MIN_NCOLS;
      }
      if (ncols > MAX_NCOLS)
      {
        fprintf(f, "Setting QPROF_NCOLS to maximum of %d\n", MAX_NCOLS);
        ncols = MAX_NCOLS;
      }
    }
  {
    char * exe_name = QPROF_get_exe_name ();
    char buf[100];

    if (0 == QPROF_get_exe_name ()) {
      /* Use pid instead.     */
      sprintf(buf, "pid%d", getpid());
      exe_name = buf;
    }
    nsamples = (unsigned long)AO_load(&QPROF_samples);
    fprintf (f, "qprof: %s: %lu samples, %lu counts\n",
           exe_name, nsamples, my_buf_size);
  }
  /* New entries may still be added; we ignore them.      */
  if (AO_load(&QPROF_buffer_overflowed))
    fprintf (f, "qprof: WARNING: buffer overflowed, "
           "samples were dropped.  " "Try setting QPROF_BUFFER_SIZE.\n");
  qsort ((struct prof_entry *) QPROF_buffer, my_buf_size,
       sizeof (struct prof_entry), prof_compare);

  if (0 != summary_level)
    {
      per_instruction = (strcmp (summary_level, "instruction") == 0);
      per_function = (strcmp (summary_level, "function") == 0);
      per_line = (!per_instruction && !per_function);
      if (per_line && strcmp (summary_level, "line") != 0)
        {
          fprintf (f, "qprof: WARNING: QPROF_GRANULARITY should be instruction,"
                 " line, or function.  Assuming line.");
        }
    }
  prev_text[0] = '\0';
  for (i = 0; i < my_buf_size; ++i)
    {
      if (AO_load(&(QPROF_buffer[i].pc)) == 0)
      {
        /* This can happen if a signal handler is still running   */
        continue;
      }
      if (i != 0
        && prof_compare ((const void *) (QPROF_buffer + i - 1),
                     (const void *) (QPROF_buffer + i)) == 0)
      {
        ++repeated;
        continue;
      }
      QPROF_format_pc (curr_text, AO_load(&(QPROF_buffer[i].pc)), ncols);
      /* If we want line or function granularity, we take advantage     */
      /* of the fact that that a prefix of the textual pc         */
      /* PC representation contains exactly enough text to distinguish  */
      /* samples that should be considered different.  We find that     */
      /* prefix, remove the stuff we don't want to see, and merge with  */
      /* the previous entry if the remaining textual description is     */
      /* identical.  This is all a quick-and-dirty hack, but it   */
      /* works ...                                          */
      if (per_line || per_function)
      {
        char *delim = strrchr (curr_text, '+');

        if (0 == delim)
          {
            if (per_line)
            {
              delim = strchr (curr_text, ' ');
            }
            else
            {
              delim = strrchr (curr_text, ':');
            }
          }
        else
          {
            *delim = ')';
            ++delim;
          }
        if (0 != delim)
          {
            char *p;
            for (p = delim; p < curr_text + ncols; ++p)
            *p = ' ';
          }
        if (delim != 0
            && strncmp (prev_text, curr_text, delim - curr_text) == 0)
          {
            ++repeated;
            continue;
          }
      }
      if (i != 0)
      {
        fprintf (f, "%s %lu\t(%3ld%%)\n", prev_text, repeated,
                                  (100*repeated+nsamples/2)/nsamples);
      }
      strcpy (prev_text, curr_text);
      repeated = 1;
    }
  if (repeated != 0)
    fprintf (f, "%s %lu\t(%3ld%%)\n", prev_text, repeated,
                              (100*repeated+nsamples/2)/nsamples);

  QPROF_end_color(f);
}

Generated by  Doxygen 1.6.0   Back to index