[PATCH v2] gas: add .cv_ucomp and .cv_scomp pseudo-directives

Jan Beulich jbeulich@suse.com
Tue Nov 5 14:31:17 GMT 2024
On 04.11.2024 22:21, Mark Harmstone wrote:
> @@ -5457,6 +5464,166 @@ s_leb128 (int sign)
>    demand_empty_rest_of_line ();
>  }
>  

> +#ifdef TE_PE
> +
> +/* Output a compressed CodeView integer.  The return value is the number of
> +   bytes used.  */
> +
> +unsigned int
> +output_cv_comp (char *p, offsetT value, int sign)
> +{
> +  char *orig = p;
> +
> +  if (sign)
> +    {
> +      if (value >= 0)
> +	value <<= 1;
> +      else
> +	value = (-value << 1) | 1;

This will badly degenerate when the incoming value is the biggest possible
negative integer. If I'm not mistaken, you'd convert it to -0 here, which
would then happily be emitted below, rather than triggering the warning.

> +    }
> +
> +  if (value <= 0x7f)
> +    {
> +      *p++ = value;
> +    }
> +  else if (value <= 0x3fff)
> +    {
> +      *p++ = 0x80 | (value >> 8);
> +      *p++ = value & 0xff;
> +    }
> +  else if (value <= 0x1fffffff)
> +    {
> +      *p++ = 0xc0 | (value >> 24);
> +      *p++ = (value >> 16) & 0xff;
> +      *p++ = (value >> 8) & 0xff;
> +      *p++ = value & 0xff;
> +    }
> +  else
> +    {
> +      if (sign)
> +	as_warn (_("value cannot be expressed as a .cv_scomp"));
> +      else
> +	as_warn (_("value cannot be expressed as a .cv_ucomp"));

Imo better

      as_warn (_("value cannot be expressed as a .cv_%ccomp", sign ? 's' : 'u'));

as .cv_?comp isn't subject to translation anyway.

Plus - is a warning really enough here?

And then - would the two non-static routines here maybe better live in
codeview.c?

> +/* Generate the appropriate fragments for a given expression to emit a
> +   cv_comp value.  SIGN is 1 for cv_scomp, 0 for cv_ucomp.  */
> +
> +static void
> +emit_cv_comp_expr (expressionS *exp, int sign)
> +{
> +  operatorT op = exp->X_op;
> +
> +  if (op == O_absent || op == O_illegal)
> +    {
> +      as_warn (_("zero assumed for missing expression"));
> +      exp->X_add_number = 0;
> +      op = O_constant;
> +    }
> +  else if (op == O_big)
> +    {
> +      as_bad (_("number invalid"));
> +      exp->X_add_number = 0;
> +      op = O_constant;
> +    }
> +  else if (op == O_register)
> +    {
> +      as_warn (_("register value used as expression"));
> +      op = O_constant;
> +    }
> +  if (now_seg == absolute_section)

Like emit_leb128_expr() has, please have a blank line above here.

> +    {
> +      if (op != O_constant || exp->X_add_number != 0)
> +	as_bad (_("attempt to store value in absolute section"));
> +      abs_section_offset++;
> +      return;
> +    }
> +
> +  if ((op != O_constant || exp->X_add_number != 0) && in_bss ())
> +    as_bad (_("attempt to store non-zero value in section `%s'"),
> +	    segment_name (now_seg));
> +
> +  /* Let the backend know that subsequent data may be byte aligned.  */
> +#ifdef md_cons_align
> +  md_cons_align (1);
> +#endif
> +
> +  if (op == O_constant)
> +    {
> +      offsetT value = exp->X_add_number;
> +      unsigned int size;
> +      char *p;
> +
> +      /* If we've got a constant, emit the thing directly right now.  */
> +
> +      if (value < 0 && !sign)
> +	as_bad (_("cannot pass a negative value to .cv_ucomp"));

No such check exists for .sleb128 / .uleb128 handling. Signed-ness
of exp->X_add_number is, I'm afraid, more difficult to determine. See
X_unsigned and X_extrabit. Yet perhaps easiest if you simply drop the
check, without much investigation.

> +      size = sizeof_cv_comp (value, sign);
> +      p = frag_more (size);
> +      if (output_cv_comp (p, value, sign) > size)
> +	abort ();
> +    }
> +  else
> +    {
> +      /* Otherwise, we have to create a variable sized fragment and
> +	 resolve things later.  */
> +
> +      frag_var (rs_cv_comp, 4, 0, sign, make_expr_symbol (exp),
> +		0, (char *) NULL);

While emit_leb128_expr() has such a cast, I think in new (including
cloned) code we'd be better off omitting unnecessary casts. Every
single one comes with a certain risk (and sets another bad precedent).

> --- /dev/null
> +++ b/gas/testsuite/gas/pe/cv_comp.d
> @@ -0,0 +1,14 @@
> +#objdump: -s -j .text
> +#name: CodeView compressed integer test
> +
> +.*: .*
> +
> +Contents of section .text:
> + 0000 21002101 212a217f 21808021 853921bf  .*
> + 0010 ff21c000 400021c0 0f424021 dfffffff  .*
> + 0020 21002102 21542180 fe218100 218a7221  .*
> + 0030 c0007ffe 21c00080 0021c01e 848021df  .*
> + 0040 fffffe21 03215521 80ff2181 01218a73  .*
> + 0050 21c0007f ff21c000 800121c0 1e848121  .*
> + 0060 dfffffff 21022104 21900421 04210821  .*
> + 0070 a0082105 210921a0 0921.*

Just to double check - you tested this for all PE-capable targets, and
it passed everywhere?

> --- /dev/null
> +++ b/gas/testsuite/gas/pe/cv_comp.s
> @@ -0,0 +1,95 @@
> +	.text

Maybe better to emit to .data or .rdata?

> --- a/gas/write.c
> +++ b/gas/write.c
> @@ -513,6 +513,37 @@ cvt_frag_to_fill (segT sec ATTRIBUTE_UNUSED, fragS *fragP)
>        break;
>  #endif
>  
> +#ifdef TE_PE
> +    case rs_cv_comp:
> +      {
> +	offsetT value = S_GET_VALUE (fragP->fr_symbol);
> +	int size;
> +
> +	if (!S_IS_DEFINED (fragP->fr_symbol))
> +	  {
> +	    as_bad_where (fragP->fr_file, fragP->fr_line,
> +			  _("cv_comp operand is an undefined symbol: %s"),

Imo such a diagnostic should properly mention the exact original directive.
I.e. including the leading dot and including the 's' or 'u'.

> +			  S_GET_NAME (fragP->fr_symbol));
> +	  }
> +
> +	if (fragP->fr_subtype == 0 && value < 0)
> +	  {
> +	    as_bad_where (fragP->fr_file, fragP->fr_line,
> +			  _("cannot pass a negative value to .cv_ucomp"));
> +	  }

See the other comment regarding (un)signed-ness.

> @@ -2785,6 +2816,12 @@ relax_segment (struct frag *segment_frag_root, segT segment, int pass)
>  	  address += sframe_estimate_size_before_relax (fragP);
>  	  break;
>  
> +#ifdef TE_PE
> +	case rs_cv_comp:
> +	  address += fragP->fr_offset = 1;
> +	  break;
> +#endif

First I meant to ask that you replicate or at least reference the comment
from rs_leb128 handling. Yet then - why don't you simply add the case label
there, re-using code and comment?

> @@ -3132,6 +3169,20 @@ relax_segment (struct frag *segment_frag_root, segT segment, int pass)
>  		growth = sframe_relax_frag (fragP);
>  		break;
>  
> +#ifdef TE_PE
> +	      case rs_cv_comp:
> +		{
> +		  valueT value;
> +		  offsetT size;
> +
> +		  value = resolve_symbol_value (fragP->fr_symbol);
> +		  size = sizeof_cv_comp (value, fragP->fr_subtype);
> +		  growth = size - fragP->fr_offset;
> +		  fragP->fr_offset = size;
> +		}
> +	      break;
> +#endif

Here I wonder if this wasn't neater if put right below rs_leb128 handling,
to make similarities / differences easier to spot. And for people touching
one to also notice it, (hopefully) forcing them to think about whether the
other one also needs touching.

Jan


More information about the Binutils mailing list