[PATCH v9 04/19] gas: implement parsing of object attributes v2

Jan Beulich jbeulich@suse.com
Fri Oct 24 14:19:22 GMT 2025
On 01.09.2025 18:56, Matthieu Longo wrote:
> --- a/bfd/elf-attrs.c
> +++ b/bfd/elf-attrs.c
> @@ -255,8 +255,147 @@ bfd_elf_set_obj_attr_contents (bfd *abfd, bfd_byte *buffer, bfd_vma size)
>    write_obj_attr_section_v1 (abfd, buffer, size);
>  }
>  
> +/* The first two tags in gnu-testing namespace are known, and so have a name and
> +   can be initialized to the default value ('0' or NULL) depending on the
> +   encoding specified on the subsection.  Any tags above 1 will be considered
> +   unknown, so will be default initialized in the same way but its status will
> +   be set to obj_attr_subsection_v2_unknown.  */
> +static const obj_attr_info_t known_attrs_gnu_testing[] =
> +{
> +  {
> +    .tag = {"GNUTestTag_0", 0},
> +    .default_value.uint_val = 0,
> +  },
> +  {
> +    .tag = {"GNUTestTag_1", 1},
> +    .default_value.uint_val = 0,
> +  },
> +};

While discussing v8 you confirmed that these aren't part of the spec. Afaict the
comment didn't change, though, and hence I'm still wondering where "known" comes
from.

> +/* List of known GNU subsections.
> +   Note: this list needs to be sorted.  */
> +static const known_subsection_v2 obj_attr_v2_known_gnu_subsections[] =
> +{
> +  {
> +    /* Note: the currently set values for the subsection name, its optionality,
> +       and encoding are irrelevant for a testing subsection.  These values are
> +       unused.  This entry is only a placeholder for list of known GNU testing
> +       tags.  */
> +    .subsec_name = NULL,

When using dedicated initializers, NULL and 0 (and false) initializers can be
omitted, and imo in most cases should be.

> +/* Search for a subsection matching NAME in the list of subsections known from
> +   bfd (generic or backend-specific).  Return the subsection information if it
> +   is found, or NULL otherwise.  */
> +const known_subsection_v2 *
> +obj_attr_v2_identify_subsection (const struct elf_backend_data *bed,
> +				 const char *name)
> +{
> +  /* Check known backend subsections.  */
> +  const known_subsection_v2 *known_subsections
> +    = bed->obj_attr_v2_known_subsections;
> +  const size_t known_subsections_size = bed->obj_attr_v2_known_subsections_size;
> +
> +  for (unsigned i = 0; i < known_subsections_size; ++i)
> +    {
> +      int cmp = strcmp (known_subsections[i].subsec_name, name);
> +      if (cmp == 0)
> +	return &known_subsections[i];
> +      else if (cmp > 0)

Another such "else" (and more further down).

> +/* To-string function for the pair <SUBSEC, TAG>.
> +   Returns the attribute information associated to TAG if it is found,
> +   or "Tag_unknown_<N>" otherwise.  */
> +const char *
> +obj_attr_v2_tag_to_string (const struct elf_backend_data *bed,
> +			   const char *subsec_name,
> +			   obj_attr_tag_t tag)
> +{
> +  const obj_attr_info_t *attr_info
> +    = obj_attr_v2_find_known_by_tag (bed, subsec_name, tag);
> +  if (attr_info != NULL)
> +    return attr_info->tag.name;
> +
> +  /* 23 because "Tag_unknown_" + max(uint32_t), i.e 4294967295 + '\0'.  */
> +  static char tag_s[23] = {'\0'};

The initializer is not only pointless, but potentially misleading: On the
2nd and later invocations that's not what the variable holds upon entry
into the function.

There's also a more general problem here: Elsewhere iirc there's effort
to make libbfd a proper library, i.e. usable by multiple threads. This is
a step backwards in that regard.

> --- a/bfd/elf-bfd.h
> +++ b/bfd/elf-bfd.h
> @@ -1656,6 +1656,12 @@ struct elf_backend_data
>    /* Encode the object attributes version into the output object.  */
>    uint8_t (*obj_attrs_version_enc) (obj_attr_version_t);
>  
> +  /* The known subsections and attributes (v2 only).  */
> +  const known_subsection_v2 *obj_attr_v2_known_subsections;
> +
> +  /* The size of the array of known subsections.  */
> +  const size_t obj_attr_v2_known_subsections_size;
> +
>    /* This function determines the order in which any attributes are
>       written.  It must be defined for input in the range
>       LEAST_KNOWN_OBJ_ATTRIBUTE..NUM_KNOWN_OBJ_ATTRIBUTES-1 (this range
> @@ -3092,6 +3098,7 @@ extern obj_attr_version_t _bfd_obj_attrs_version_dec (uint8_t);
>  extern uint8_t _bfd_obj_attrs_version_enc (obj_attr_version_t);
>  extern bfd_vma bfd_elf_obj_attr_size (bfd *);
>  extern void bfd_elf_set_obj_attr_contents (bfd *, bfd_byte *, bfd_vma);
> +extern obj_attribute *elf_new_obj_attr (bfd *, obj_attr_vendor_t, obj_attr_tag_t);
>  extern int bfd_elf_get_obj_attr_int (bfd *, obj_attr_vendor_t, obj_attr_tag_t);
>  extern obj_attribute *bfd_elf_add_obj_attr_int
>    (bfd *, obj_attr_vendor_t, obj_attr_tag_t, unsigned int);

When all adjacent functions have a bfd_ (or _bfd_) prefix, why would the new one
not have one as well?

> --- a/bfd/elfxx-aarch64.c
> +++ b/bfd/elfxx-aarch64.c
> @@ -21,8 +21,10 @@
>  #include "sysdep.h"
>  #include "bfd.h"
>  #include "elf-bfd.h"
> +#include "elf/aarch64.h"
>  #include "elfxx-aarch64.h"
>  #include "libbfd.h"
> +#include "libiberty.h"
>  #include <stdarg.h>
>  #include <string.h>
>  
> @@ -887,6 +889,63 @@ _bfd_aarch64_obj_attrs_version_enc (obj_attr_version_t version)
>    abort ();
>  }
>  
> +/* List of known attributes in the subsection "aeabi_feature_and_bits".
> +   Note: the array below needs to be sorted.  */
> +static const obj_attr_info_t known_attrs_aeabi_feature_and_bits[] =
> +{
> +  {
> +    .tag = {"Tag_Feature_BTI", Tag_Feature_BTI},
> +    .default_value.uint_val = 0,
> +  },
> +  {
> +    .tag = {"Tag_Feature_PAC", Tag_Feature_PAC},
> +    .default_value.uint_val = 0,
> +  },
> +  {
> +    .tag = {"Tag_Feature_GCS", Tag_Feature_GCS},
> +    .default_value.uint_val = 0,
> +  },
> +};
> +
> +/* List of known attributes in the subsection "aeabi_pauthabi".
> +   Notes:
> +   - "aeabi_pauthabi" is a required subsection to use PAuthABI (which is
> +     today only supported by LLVM, unsupported by GCC 15 and lower. There is no
> +     plan to add support for it in the future). A value of 0 for any the tags
> +     below means that the user did not permit this entity to use the PAuthABI.
> +   - the array below needs to be sorted.  */
> +static const obj_attr_info_t known_attrs_aeabi_pauthabi[] =
> +{
> +  {
> +    .tag = {"Tag_PAuth_Platform", Tag_PAuth_Platform},
> +    .default_value.uint_val = 0,
> +  },
> +  {
> +    .tag = {"Tag_PAuth_Schema", Tag_PAuth_Schema},
> +    .default_value.uint_val = 0,
> +  },
> +};
> +
> +/* List of known subsections.
> +   Note: this array is exported by the backend, and needs to be sorted.  */

Sorted by what criteria?

> --- a/bfd/elfxx-target.h
> +++ b/bfd/elfxx-target.h
> @@ -563,6 +563,12 @@
>  #ifndef elf_backend_obj_attrs_version_enc
>  #define elf_backend_obj_attrs_version_enc	_bfd_obj_attrs_version_enc
>  #endif
> +#ifndef	elf_backend_obj_attr_v2_known_subsections
> +#define elf_backend_obj_attr_v2_known_subsections	NULL
> +#endif
> +#ifndef	elf_backend_obj_attr_v2_known_subsections_size
> +#define elf_backend_obj_attr_v2_known_subsections_size	0
> +#endif
>  #ifndef elf_backend_obj_attrs_order
>  #define elf_backend_obj_attrs_order		NULL
>  #endif

While you use tabs, adjacent entries use blanks after #ifndef.

> --- a/gas/config/obj-elf-attr.c
> +++ b/gas/config/obj-elf-attr.c
> @@ -22,8 +22,70 @@
>  
>  #ifdef TC_OBJ_ATTR
>  
> +#include "obstack.h"
>  #include "safe-ctype.h"
>  
> +/* A variant type to store information about known OAv2 identifiers.  */
> +typedef union {
> +  uint8_t u8;
> +  bool b;
> +} oav2_identifier_variant_value;
> +
> +typedef enum {
> +  OAv2_ASM_ID_VALUE_UNDEFINED = 0,
> +  OAv2_ASM_ID_VALUE_U8,
> +  OAv2_ASM_ID_VALUE_BOOL,
> +} oav2_identifier_variant_type_info;
> +
> +typedef struct {
> +  oav2_identifier_variant_value val;
> +  oav2_identifier_variant_type_info vtype;
> +} oav2_identifier_variant_t;
> +
> +typedef struct {
> +  const char *const name;
> +  const oav2_identifier_variant_t value;
> +} oav2_identifier_t;
> +
> +/* A variant type to store the argument values of an assembly directive.  */
> +struct arg_variant_t;
> +
> +typedef struct {
> +  size_t len;
> +  struct arg_variant_t *elts;
> +} arg_variant_list;
> +
> +typedef union {
> +  const char *string;
> +#if (TC_OBJ_ATTR_v1)
> +  uint32_t u32;
> +#endif

How does v1 come into play here?

> +  uint64_t u64;
> +  int64_t i64;
> +  arg_variant_list list;
> +} arg_variant_value;
> +
> +typedef enum {
> +  VALUE_UNDEFINED = 0,
> +#if (TC_OBJ_ATTR_v1)
> +  VALUE_U32,
> +#endif
> +  VALUE_U64,
> +  VALUE_I64,
> +  VALUE_UNSIGNED_INTEGER = VALUE_U64,
> +  VALUE_SIGNED_INTEGER = VALUE_I64,
> +  VALUE_STRING,
> +  VALUE_LIST,
> +  VALUE_OPTIONAL_ABSENT,
> +} arg_variant_type_info;
> +
> +typedef struct arg_variant_t {
> +  arg_variant_value val;
> +  arg_variant_type_info vtype;
> +} arg_variant_t;
> +
> +typedef arg_variant_t arg_t;
> +
>  #define skip_whitespace(str)  do { if (is_whitespace (*(str))) ++(str); } while (0)
>  
>  static inline bool
> @@ -38,6 +100,8 @@ skip_past_char (char **str, char c)
>  }
>  #define skip_past_comma(str) skip_past_char (str, ',')
>  
> +#if (TC_OBJ_ATTR_v1)
> +
>  /* A list of attributes that have been explicitly set by the assembly code.
>     VENDOR is the vendor id, BASE is the tag shifted right by the number
>     of bits in MASK, and bit N of MASK is set if tag BASE+N has been set.  */
> @@ -120,6 +184,997 @@ oav1_attr_seen (obj_attr_vendor_t vendor, obj_attr_tag_t tag)
>    return false;
>  }
>  
> +#endif /* TC_OBJ_ATTR_v1 */
> +
> +/* Expected argument tokens for object attribute directives.  */
> +typedef enum {
> +  /* Base types.  */
> +  IDENTIFIER = 0x1,
> +  UNSIGNED_INTEGER = 0x2,
> +  SIGNED_INTEGER = 0x4,
> +  STRING = 0x8,
> +  LIST = 0x10,
> +  LT_MASK = 0xFF,
> +  /* Higher types.  */
> +  SUBSECTION_NAME = 0x100,
> +  SUBSECTION_OPTION_1 = 0x200,
> +  SUBSECTION_OPTION_2 = 0x400,
> +  ATTRIBUTE_KEY = 0x800,
> +  ATTRIBUTE_VALUE = 0x1000,
> +  HT_MASK = 0xFF00,
> +} arg_token_t;
> +
> +/* Free an arguments list of size N.  */
> +static void
> +args_list_free (arg_t *args, size_t n)
> +{
> +  for (size_t i = 0; i < n; ++i)
> +    if (args[i].vtype == VALUE_STRING)
> +      free ((void *) args[i].val.string);
> +    else if (args[i].vtype == VALUE_LIST)
> +      args_list_free (args[i].val.list.elts, args[i].val.list.len);
> +  free (args);
> +}
> +
> +/* Extract a string literal ("[^.]+") from the input.  */
> +static bool
> +extract_string_literal (arg_t *arg_out)
> +{
> +  skip_whitespace (input_line_pointer);
> +
> +  if (*input_line_pointer != '"')
> +    {
> +      as_bad ("missing '\"', expected a string literal");
> +      return false;
> +    }
> +
> +  int len;
> +  char *obstack_buf = demand_copy_C_string (&len);

The function already skips whitespace and checks for a starting '"', so
why open-code that here again?

> +  if (obstack_buf != NULL)
> +    {
> +      arg_out->val.string = xstrdup (obstack_buf);
> +      obstack_free (&notes, obstack_buf);
> +      arg_out->vtype = VALUE_STRING;
> +      return true;
> +    }
> +
> +  arg_out->val.string = NULL;
> +  return false;
> +}
> +
> +/* Extract an integer literal from the input.
> +   Anything matched by O_constant is considered an integer literal (see the
> +   usage of O_constant in expr.c to see all the matches.  */
> +static bool
> +extract_integer_literal (arg_t *arg_out,
> +			 bool want_unsigned)
> +{
> +  expressionS exp;
> +  expression_and_evaluate (&exp);
> +  if (exp.X_op != O_constant)
> +    {
> +      as_bad (_("invalid value, expected an integer literal"));
> +      goto bad;
> +    }
> +
> +  int64_t val = (int64_t) exp.X_add_number;
> +  if (want_unsigned)
> +    {
> +      if (! exp.X_unsigned && val < 0)
> +	{
> +	  as_bad (_("invalid negative value %" PRId64 ", expected an unsigned "
> +		    "integer"), val);
> +	  goto bad;
> +	}
> +      arg_out->val.u64 = val;
> +      arg_out->vtype = VALUE_UNSIGNED_INTEGER;
> +    }
> +  else
> +    {
> +      arg_out->val.i64 = val;
> +      arg_out->vtype = VALUE_SIGNED_INTEGER;
> +    }
> +  return true;
> +
> + bad:
> +  ignore_rest_of_line ();
> +  return false;
> +}
> +
> +/* Extract an identifier based on the provided character matcher.  */
> +static bool
> +extract_identifier (bool (*char_predicate) (char), arg_t *arg_out)
> +{
> +  const char *s = input_line_pointer;
> +  unsigned int i = 0;
> +  for (; char_predicate (*input_line_pointer); ++input_line_pointer)
> +    i++;
> +  if (i == 0)
> +    {
> +      as_bad (_("invalid value '%c', expected an identifier"),
> +	      *input_line_pointer);
> +      ignore_rest_of_line ();
> +      return false;
> +    }
> +
> +  arg_out->vtype = VALUE_STRING;
> +  arg_out->val.string = xmemdup0 (s, i);
> +  return true;
> +}
> +
> +#if (TC_OBJ_ATTR_v2)
> +/* Resolve the identifier if it matches a known tag.  */
> +static bool
> +resolve_if_matching_known_tag (const char *identifier,
> +			       const obj_attr_tag_info_t *known_identifier,
> +			       arg_t *val_out)
> +{
> +  if (strcmp (known_identifier->name, identifier) != 0)
> +    return false;
> +
> +  /* Free the identifier since we found the value.  */
> +  free ((void *) val_out->val.string);

The identifier you found is pointed to by "identifier", not
val_out->val.string. What's the (hidden) relation between the two that
justfies this freeing?

> +  val_out->val.u64 = known_identifier->value;
> +  val_out->vtype = VALUE_UNSIGNED_INTEGER;
> +  return true;
> +}
> +
> +/* Resolve the identifier if it matches the given symbol.  */
> +static bool
> +resolve_if_matching (const char *identifier,
> +		     const oav2_identifier_t *known_identifier,
> +		     arg_t *val_out)
> +{
> +  if (strcmp (known_identifier->name, identifier) != 0)
> +    return false;
> +
> +  /* Free the identifier since we found the value.  */
> +  free ((void *) val_out->val.string);

Same question here, obviously.

> +  switch (known_identifier->value.vtype)
> +    {
> +    case OAv2_ASM_ID_VALUE_U8:
> +      val_out->val.u64 = known_identifier->value.val.b;
> +      val_out->vtype = VALUE_UNSIGNED_INTEGER;
> +      break;
> +    case OAv2_ASM_ID_VALUE_BOOL:
> +      val_out->val.u64 = known_identifier->value.val.u8;
> +      val_out->vtype = VALUE_UNSIGNED_INTEGER;
> +      break;
> +    default:
> +      abort ();
> +    }

How come bool and u8 are all that need handling? Surely the reader could be
helped by having a comment tio this effect?

> +  return true;
> +}
> +#endif /* TC_OBJ_ATTR_v2 */
> +
> +#if (TC_OBJ_ATTR_v1)
> +/* Look up attribute tags defined in the backend (object attribute v1).  */
> +static bool
> +obj_attr_v1_lookup_known_attr_tag_symbol (const char *identifier,
> +					  arg_token_t token_type,
> +					  arg_t *val_out)
> +{
> +#ifndef CONVERT_SYMBOLIC_ATTRIBUTE
> +#define CONVERT_SYMBOLIC_ATTRIBUTE(a) -1
> +  (void) identifier;

As indicated in v8 review - the common way is use of ATTRIBUTE_UNUSED.

> +#endif
> +
> +  gas_assert (token_type & UNSIGNED_INTEGER);
> +
> +  int tag = CONVERT_SYMBOLIC_ATTRIBUTE (identifier);
> +  if (tag < 0)
> +    return false;
> +  val_out->val.u64 = tag;
> +  val_out->vtype = VALUE_UNSIGNED_INTEGER;
> +  return true;
> +}
> +#endif /* TC_OBJ_ATTR_v1 */
> +
> +#if (TC_OBJ_ATTR_v2)
> +/* Look up attribute tags defined in the backend (object attribute v2).  */
> +static bool
> +obj_attr_v2_lookup_known_attr_tag_symbol (const char *identifier,
> +					  arg_token_t token_type,
> +					  arg_t *val_out)
> +{
> +  obj_attr_subsection_v2 *subsec = elf_obj_attr_subsections (stdoutput).last;
> +  /* If there is no current subsection, this function was called wrongly before
> +     setting one (usually via the subsection directive).  */
> +  gas_assert (subsec != NULL);
> +
> +  /* An attribute tag is an unsigned integer, so the expected token type should
> +     always have the base type UNSIGNED_INTEGER.  Otherwise, this function was
> +     called incorrectly.  */
> +  gas_assert (token_type & UNSIGNED_INTEGER);
> +
> +  bool resolved = false;
> +  const struct elf_backend_data *be = get_elf_backend_data (stdoutput);
> +  const known_subsection_v2 *known_subsec
> +    = obj_attr_v2_identify_subsection (be, subsec->name);
> +  if (known_subsec != NULL)
> +    {
> +      for (size_t i = 0; i < known_subsec->len && ! resolved; ++i)
> +	resolved
> +	  = resolve_if_matching_known_tag (identifier,
> +					   &known_subsec->known_attrs[i].tag,
> +					   val_out);
> +    }
> +
> +  if (resolved)
> +    /* An attribute tag is an unsigned integer, so the type of the found value
> +       should be VALUE_UNSIGNED_INTEGER.  Otherwise, check if you set correctly
> +       the type of the value associated to the symbol.  */
> +    gas_assert (val_out->vtype == VALUE_UNSIGNED_INTEGER);
> +
> +  return resolved;
> +}
> +#endif /* TC_OBJ_ATTR_v2 */
> +
> +/* Look up known symbols, and try to resolve the given identifier.  */
> +static bool
> +lookup_known_symbols (const char *identifier,
> +		      arg_token_t token_type,
> +		      arg_t *val_out)
> +{
> +  if (identifier == NULL)
> +    return false;
> +
> +  /* The identifier should match the value in val_out.  */
> +  gas_assert (val_out->val.string == identifier);
> +
> +  arg_token_t high_ttype = (token_type & HT_MASK);
> +
> +#if (TC_OBJ_ATTR_v2)
> +  static const oav2_identifier_t known_identifiers_subsection_optional[] = {
> +    { "optional", {
> +	.val.b = true,
> +	.vtype = OAv2_ASM_ID_VALUE_BOOL
> +      }
> +    },
> +    { "required", {
> +	.val.b = false,
> +	.vtype = OAv2_ASM_ID_VALUE_BOOL
> +      }
> +    },
> +  };

Interesting - the "keywords" a user would specify need to be all lowercase here,
but ...

> +  static const oav2_identifier_t known_identifiers_subsection_encoding[] = {
> +    { "ULEB128", {
> +	.val.u8 = obj_attr_encoding_v2_to_u8 (OA_ENC_ULEB128),
> +	.vtype = OAv2_ASM_ID_VALUE_U8
> +      }
> +    },
> +    { "uleb128", {
> +	.val.u8 = obj_attr_encoding_v2_to_u8 (OA_ENC_ULEB128),
> +	.vtype = OAv2_ASM_ID_VALUE_U8
> +      }
> +    },
> +    { "NTBS", {
> +	.val.u8 = obj_attr_encoding_v2_to_u8 (OA_ENC_NTBS),
> +	.vtype = OAv2_ASM_ID_VALUE_U8
> +      }
> +    },
> +    { "ntbs", {
> +	.val.u8 = obj_attr_encoding_v2_to_u8 (OA_ENC_NTBS),
> +	.vtype = OAv2_ASM_ID_VALUE_U8
> +      }
> +    },
> +  };

... can be all low or all upper case here (but then not e.g. Uleb128). Wouldn't
case-insensitive comparison be preferable, leaving more freedom to users?

> +#endif /* TC_OBJ_ATTR_v2 */
> +
> +  bool resolved = false;
> +
> +#if (TC_OBJ_ATTR_v2)
> +  if (high_ttype == SUBSECTION_OPTION_1 || high_ttype == SUBSECTION_OPTION_2)
> +    {
> +      const oav2_identifier_t *known_identifiers
> +	= (high_ttype == SUBSECTION_OPTION_1
> +	   ? known_identifiers_subsection_optional
> +	   : known_identifiers_subsection_encoding);
> +      const size_t N_identifiers
> +	= (high_ttype == SUBSECTION_OPTION_1
> +	   ? ARRAY_SIZE (known_identifiers_subsection_optional)
> +	   : ARRAY_SIZE (known_identifiers_subsection_encoding));
> +
> +      for (size_t i = 0; i < N_identifiers && ! resolved; ++i)
> +	resolved = resolve_if_matching (identifier,
> +					&known_identifiers[i],
> +					val_out);
> +    }
> +  else
> +#endif /* TC_OBJ_ATTR_v2 */
> +  if (high_ttype == ATTRIBUTE_KEY)
> +    {
> +      obj_attr_version_t version = elf_obj_attr_version (stdoutput);
> +#if (TC_OBJ_ATTR_v1)
> +      if (version == OBJ_ATTR_V1)
> +	resolved = obj_attr_v1_lookup_known_attr_tag_symbol (identifier,
> +	  token_type, val_out);
> +#endif /* TC_OBJ_ATTR_v1 */
> +#if (TC_OBJ_ATTR_v2)
> +  #if (TC_OBJ_ATTR_v1)
> +      else
> +  #endif /* TC_OBJ_ATTR_v1 */
> +      if (version == OBJ_ATTR_V2)
> +	resolved = obj_attr_v2_lookup_known_attr_tag_symbol (identifier,
> +	  token_type, val_out);
> +#endif /* TC_OBJ_ATTR_v2 */
> +      else
> +	abort ();
> +    }
> +  else
> +    abort ();
> +
> +  return resolved;
> +}
> +
> +/* Look up the symbol table of this compilation unit, and try to resolve the
> +   given identifier.  */
> +static bool
> +lookup_symbol_table (const char *identifier,
> +		     const arg_token_t expected_ttype,
> +		     arg_t *val_out)
> +{
> +  if (identifier == NULL)
> +    return false;
> +
> +  /* Note: signed integer are unsupported for now.  */
> +  gas_assert (expected_ttype & UNSIGNED_INTEGER);
> +  /* The identifier should match the value in val_out.  */
> +  gas_assert (val_out->val.string == identifier);

Hmm, maybe herein lies part of the answer to an earlier question. Yet then why
would those functions take redundant parameters?

> +  symbolS *symbolP = symbol_find (identifier);
> +  if (symbolP == NULL)
> +    return false;
> +
> +  if (! S_IS_DEFINED (symbolP))
> +    return false;
> +
> +  valueT val = S_GET_VALUE (symbolP);
> +
> +  /* Free the identifier since we found the value.  */
> +  free ((void *) val_out->val.string);
> +
> +  val_out->val.u64 = val;
> +  val_out->vtype = VALUE_UNSIGNED_INTEGER;
> +
> +  return true;
> +}
> +
> +/* Return true if the next characters are suspected to represent an integer
> +   literal.  */
> +static bool
> +look_like_integer_literal (const char *cursor)
> +{
> +  if (ISDIGIT (*cursor))
> +    return true;
> +  if (! (*cursor == '+' || *cursor == '-' || *cursor == '~'))
> +    return false;
> +  ++cursor;
> +  while (ISSPACE (*cursor))
> +    ++cursor;
> +  return ISDIGIT (*cursor);
> +}

Hmm, now that I look at the result, maybe I misled you. An "integer literal"
would perhaps indeed not allow for ~ and also not for a blank between sign
and first digit. Yet then, as before, I'm having trouble seeing why proper
expressions (involving perhaps more than just integer literals, e.g. also
parentheses) shouldn't be acceptable.

> +/* Skip white spaces + parameter separator after a parameter.
> +   Error if it does not meet a parameter separator after a parameter.  */
> +static bool
> +skip_whitespaces_past_comma (unsigned int n)
> +{
> +  skip_whitespace (input_line_pointer);
> +  if (! skip_past_comma (&input_line_pointer))
> +    {
> +      if (n == 0)
> +	as_bad (_("unexpected comma after value"));
> +      else
> +	as_bad (_("missing comma after parameter %d"), n);

Nit: %u please for unsigned int values.

> +      return false;
> +    }
> +  return true;
> +}
> +
> +/* Can parse a list of arguments with variable length.  */
> +static bool
> +obj_attr_parse_args (arg_token_t expected_ttype,
> +		     bool (*match_identifier) (char c),
> +		     bool resolve_identifier,
> +		     arg_t *arg_out)
> +{
> +  if ((expected_ttype & LIST) == 0)
> +    return obj_attr_parse_arg (expected_ttype, match_identifier,
> +      resolve_identifier, false, arg_out);
> +
> +  static const size_t LIST_MAX_SIZE = 2;
> +  arg_t *arg_list = xcalloc (LIST_MAX_SIZE, sizeof (arg_t));

As said in reply to at least one of the v8 patches - please avoid
sizeof(<type>) when a specific variable or expression is what you're after
(here: *arg_list).

> +
> +  /* We don't want to support recursive lists.  */
> +  expected_ttype &= ~LIST;
> +
> +  size_t n = 0;
> +  do {
> +    if (! trim_whitespaces_before_param (0))
> +      goto bad;
> +
> +    if (! obj_attr_parse_arg (expected_ttype, match_identifier,
> +			      resolve_identifier, false, &arg_list[n]))
> +      goto bad;
> +
> +    ++n;
> +    skip_whitespace (input_line_pointer);
> +    if (is_end_of_stmt (*input_line_pointer))
> +      break;
> +
> +    if (! skip_whitespaces_past_comma (0))
> +      goto bad;
> +
> +    if (n >= LIST_MAX_SIZE)
> +      {
> +	as_bad ("too many arguments for a list (max: %lu)", LIST_MAX_SIZE);
> +	goto bad;
> +      }
> +  } while (n < LIST_MAX_SIZE);
> +
> +  arg_out->vtype = VALUE_LIST;
> +  arg_out->val.list.len = n;
> +  arg_out->val.list.elts = arg_list;
> +  return true;
> +
> + bad:
> +  args_list_free (arg_list, n);
> +  return false;
> +}
> +
> +#if (TC_OBJ_ATTR_v2)
> +static bool
> +is_valid_boolean (uint64_t value)
> +{
> +  return value == 0 || value == 1;
> +}
> +
> +#define is_valid_comprehension is_valid_boolean
> +
> +static bool
> +is_valid_encoding (uint64_t value)
> +{
> +  value = obj_attr_encoding_v2_from_u8 (value);
> +  return OA_ENC_UNSET < value && value <= OA_ENC_MAX;
> +}
> +
> +static bool
> +match_subsection_identifier (char c)
> +{
> +  return ISALNUM (c) || c == '_' || c == '-';
> +}

Why "match" and not "is", like others above are?

> +#endif /* TC_OBJ_ATTR_v2 */
> +
> +static bool
> +match_symbol (char c)
> +{
> +  return ISALNUM (c) || c == '_';
> +}

Same here then, obviously.

> --- a/gas/config/obj-elf-attr.h
> +++ b/gas/config/obj-elf-attr.h
> @@ -23,12 +23,36 @@
>  
>  #include "as.h"
>  
> +#ifndef TC_OBJ_ATTR_V1
> +#define TC_OBJ_ATTR_V1 0
> +#endif
> +#ifndef TC_OBJ_ATTR_V2
> +#define TC_OBJ_ATTR_V2 0
> +#endif
> +
> +#if (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2)
> +  #define TC_OBJ_ATTR 1
> +#endif

Given this, ...

>  #ifdef TC_OBJ_ATTR
>  
> +#if (TC_OBJ_ATTR_v1)
>  extern void oav1_attr_info_init (void);
>  extern void oav1_attr_info_exit (void);
>  extern bool oav1_attr_seen (obj_attr_vendor_t, obj_attr_tag_t);
> +#endif /* TC_OBJ_ATTR_v1 */
> +
> +/* Object attributes parsers.  */
> +
> +#if (TC_OBJ_ATTR_v1)
>  extern obj_attr_tag_t obj_attr_v1_process_attribute (obj_attr_vendor_t);
> +#endif /* (TC_OBJ_ATTR_v1) */
> +#if (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2)
> +extern obj_attr_tag_t obj_attr_process_attribute (obj_attr_vendor_t);
> +#endif /* (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2) */

... is the #if really needed here? Or ...

> +#if (TC_OBJ_ATTR_v2)
> +extern void obj_attr_process_subsection (void);
> +#endif /* (TC_OBJ_ATTR_v2) */
>  
>  #endif /* TC_OBJ_ATTR */
>  
> --- a/gas/config/obj-elf.c
> +++ b/gas/config/obj-elf.c
> @@ -72,7 +72,9 @@ static void obj_elf_symver (int);
>  static void obj_elf_subsection (int);
>  static void obj_elf_popsection (int);
>  #ifdef TC_OBJ_ATTR
> +#if (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2)
>  static void obj_elf_gnu_attribute (int);
> +#endif /* (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2) */
>  #endif /* TC_OBJ_ATTR */

... here?

> @@ -119,7 +121,9 @@ static const pseudo_typeS elf_pseudo_table[] =
>  
>    /* A GNU extension for object attributes.  */
>  #ifdef TC_OBJ_ATTR
> +#if (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2)
>    {"gnu_attribute", obj_elf_gnu_attribute, 0},
> +#endif /* (TC_OBJ_ATTR_v1 || TC_OBJ_ATTR_v2) */
>  #endif /* TC_OBJ_ATTR */

Same question here.

> @@ -3044,9 +3057,9 @@ elf_end (void)
>        free (groups.head);
>      }
>  
> -#ifdef TC_OBJ_ATTR
> +#if defined (TC_OBJ_ATTR) && TC_OBJ_ATTR_v1
>    oav1_attr_info_exit ();
> -#endif /* TC_OBJ_ATTR */
> +#endif /* defined (TC_OBJ_ATTR) && TC_OBJ_ATTR_v1 */
>  }

And isn't a TC_OBJ_ATTR_v1 check alone sufficient here?

Jan


More information about the Binutils mailing list