[PATCH 2/2] gprof: fix hist rounding scale calculation

Richard Allen rsaxvc@gmail.com
Wed Jun 4 02:46:19 GMT 2025
Use 1-byte wide bins. This reduces inaccuracies on
systems with odd-byte instruction widths. (Xtensa and
x86/AMD64 for me). This addresses the root cause
behind the 2effb0d11f workaround.

Signed-off-by: Richard Allen <rsaxvc@gmail.com>
---
 gprof/cg_print.c |  4 ++--
 gprof/gmon_io.c  |  7 +++----
 gprof/hist.c     | 54 ++++++++++++++++++++++--------------------------
 gprof/symtab.h   |  2 +-
 4 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/gprof/cg_print.c b/gprof/cg_print.c
index c8e80d9d3a5..d3e8fb41dbc 100644
--- a/gprof/cg_print.c
+++ b/gprof/cg_print.c
@@ -76,8 +76,8 @@ print_header (void)
 	printf (_("\t\t\tCall graph\n\n"));
     }
 
-  printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
-	  (long) hist_scale * (long) sizeof (UNIT));
+  printf (_("\ngranularity: each sample hit covers %.1f byte(s)"),
+	  hist_scale);
 
   if (print_time > 0.0)
     printf (_(" for %.2f%% of %.2f seconds\n\n"),
diff --git a/gprof/gmon_io.c b/gprof/gmon_io.c
index 36ba7987a27..48db194f503 100644
--- a/gprof/gmon_io.c
+++ b/gprof/gmon_io.c
@@ -467,7 +467,7 @@ gmon_out_read (const char *filename)
 	}
 
       samp_bytes = tmp.ncnt - header_size;
-      hist_num_bins = samp_bytes / sizeof (UNIT);
+      hist_num_bins = samp_bytes;
       if (histograms && (tmp.low_pc != histograms->lowpc
 			 || tmp.high_pc != histograms->highpc
 			 || (hist_num_bins != histograms->num_bins)))
@@ -484,8 +484,7 @@ gmon_out_read (const char *filename)
 	  histograms->lowpc = tmp.low_pc;
 	  histograms->highpc = tmp.high_pc;
 	  histograms->num_bins = hist_num_bins;
-	  hist_scale = (double)((tmp.high_pc - tmp.low_pc) / sizeof (UNIT))
-	    / hist_num_bins;
+	  hist_scale = (double)(tmp.high_pc - tmp.low_pc) / hist_num_bins;
 	  histograms->sample = (int *) xmalloc (hist_num_bins * sizeof (int));
 	  memset (histograms->sample, 0,
 		  hist_num_bins * sizeof (int));
@@ -675,7 +674,7 @@ gmon_out_write (const char *filename)
       if (gmon_io_write_vma (ofp, histograms->lowpc)
           || gmon_io_write_vma (ofp, histograms->highpc)
           || gmon_io_write_32 (ofp, histograms->num_bins
-			       * sizeof (UNIT) + hdrsize))
+			       + hdrsize))
 	{
 	  perror (filename);
 	  done (1);
diff --git a/gprof/hist.c b/gprof/hist.c
index ef4756531c6..716dae2d1ef 100644
--- a/gprof/hist.c
+++ b/gprof/hist.c
@@ -34,9 +34,7 @@
 #include "stdio.h"
 #include "stdlib.h"
 
-#define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
-
-static void scale_and_align_entries (void);
+static void align_entries (void);
 static void print_header (int);
 static void print_line (Sym *, double);
 static int cmp_time (const void *, const void *);
@@ -109,8 +107,7 @@ read_histogram_header (histogram *record,
       done (1);
     }
 
-  n_hist_scale = (double)(record->highpc - record->lowpc) / sizeof (UNIT)
-    / record->num_bins;
+  n_hist_scale = (double)(record->highpc - record->lowpc) / record->num_bins;
 
   if (first)
     {
@@ -152,7 +149,7 @@ read_histogram_header (histogram *record,
 	 there's code (notably printing code), that prints units,
 	 and it would be very confusing to have one unit mean different
 	 things for different functions.  */
-      if (fabs (hist_scale - n_hist_scale) > 0.000001)
+      if (fabs (hist_scale - n_hist_scale) > 0.000002)
 	{
 	  fprintf (stderr,
 		   _("%s: different scales in histogram records: %f != %f\n"),
@@ -289,7 +286,7 @@ hist_write_hist (FILE * ofp, const char *filename)
    next bin.  */
 
 static void
-scale_and_align_entries (void)
+align_entries (void)
 {
   Sym *sym;
   bfd_vma bin_of_entry;
@@ -300,21 +297,21 @@ scale_and_align_entries (void)
     {
       histogram *r = find_histogram_for_pc (sym->addr);
 
-      sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
+      sym->hist.addr = sym->addr;
 
       if (r)
 	{
-	  bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
-	  bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
+	  bin_of_entry = (sym->hist.addr - r->lowpc) / hist_scale;
+	  bin_of_code = ((sym->hist.addr + offset_to_code - r->lowpc)
 		     / hist_scale);
 	  if (bin_of_entry < bin_of_code)
 	    {
 	      DBG (SAMPLEDEBUG,
-		   printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
-			   (unsigned long) sym->hist.scaled_addr,
-			   (unsigned long) (sym->hist.scaled_addr
-					    + UNITS_TO_CODE)));
-	      sym->hist.scaled_addr += UNITS_TO_CODE;
+		   printf ("[align_entries] pushing 0x%lx to 0x%lx\n",
+			   (unsigned long) sym->hist.addr,
+			   (unsigned long) (sym->hist.addr
+					    + offset_to_code)));
+	      sym->hist.addr += offset_to_code;
 	    }
 	}
     }
@@ -351,12 +348,11 @@ scale_and_align_entries (void)
 
    For the VAX we assert that samples will never fall in the first two
    bytes of any routine, since that is the entry mask, thus we call
-   scale_and_align_entries() to adjust the entry points if the entry
-   mask falls in one bin but the code for the routine doesn't start
-   until the next bin.  In conjunction with the alignment of routine
-   addresses, this should allow us to have only one sample for every
-   four bytes of text space and never have any overlap (the two end
-   cases, above).  */
+   align_entries() to adjust the entry points if the entry mask falls
+   in one bin but the code for the routine doesn't start until the
+   next bin.  In conjunction with the alignment of routine addresses,
+   this should allow us to have only one sample for every four bytes
+   of text space and never have any overlap (the two end cases, above).*/
 
 static void
 hist_assign_samples_1 (histogram *r)
@@ -369,7 +365,7 @@ hist_assign_samples_1 (histogram *r)
   double count_time, credit;
   Sym_Table *symtab = get_symtab ();
 
-  bfd_vma lowpc = r->lowpc / sizeof (UNIT);
+  bfd_vma lowpc = r->lowpc;
 
   /* Iterate over all sample bins.  */
   for (i = 0, k = 1; i < r->num_bins; ++i)
@@ -385,8 +381,8 @@ hist_assign_samples_1 (histogram *r)
       DBG (SAMPLEDEBUG,
 	   printf (
       "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
-		    (unsigned long) (sizeof (UNIT) * bin_low_pc),
-		    (unsigned long) (sizeof (UNIT) * bin_high_pc),
+		    (unsigned long) (bin_low_pc),
+		    (unsigned long) (bin_high_pc),
 		    bin_count));
       total_time += count_time;
 
@@ -396,8 +392,8 @@ hist_assign_samples_1 (histogram *r)
 	 and J will never be less than 0.  */
       for (j = k - 1; j < symtab->len; k = ++j)
 	{
-	  sym_low_pc = symtab->base[j].hist.scaled_addr;
-	  sym_high_pc = symtab->base[j + 1].hist.scaled_addr;
+	  sym_low_pc = symtab->base[j].hist.addr;
+	  sym_high_pc = symtab->base[j + 1].hist.addr;
 
 	  /* If high end of bin is below entry address,
 	     go for next bin.  */
@@ -417,7 +413,7 @@ hist_assign_samples_1 (histogram *r)
 		   printf (
 	       "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
 			   (unsigned long) symtab->base[j].addr,
-			   (unsigned long) (sizeof (UNIT) * sym_high_pc),
+			   (unsigned long) sym_high_pc,
 			   symtab->base[j].name, overlap * count_time / hist_scale,
 			   (long) overlap));
 
@@ -451,7 +447,7 @@ hist_assign_samples (void)
 {
   unsigned i;
 
-  scale_and_align_entries ();
+  align_entries ();
 
   for (i = 0; i < num_histograms; ++i)
     hist_assign_samples_1 (&histograms[i]);
@@ -470,7 +466,7 @@ print_header (int prefix)
   if (bsd_style_output)
     {
       printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
-	      (long) hist_scale * (long) sizeof (UNIT));
+	      (long) hist_scale);
       if (total_time > 0.0)
 	{
 	  printf (_(" for %.2f%% of %.2f %s\n\n"),
diff --git a/gprof/symtab.h b/gprof/symtab.h
index 19031b917b0..40b9b3ce96d 100644
--- a/gprof/symtab.h
+++ b/gprof/symtab.h
@@ -67,7 +67,7 @@ typedef struct sym
     struct
       {
 	double time;		/* (Weighted) ticks in this routine.  */
-	bfd_vma scaled_addr;	/* Scaled entry point.  */
+	bfd_vma addr;	/* Entry point.  */
       }
     hist;
 
-- 
2.39.5



More information about the Binutils mailing list