Add support for orc format by MehulBatra · Pull Request #790 · apache/iceberg-python

Hi @Fokko and @HonahX
✅ I have modified the read logic to read the orc file-based iceberg table and wrote an integration test too it is working great.

Would love Some guidance on:

I could find a way to create an orc file-based iceberg table via glue client(by passing the properties with format=orc)

But this is still making parquet data files when I am appending the data ( Is it due to datafile and deletefile logic that they are by default taking parquet file format)

from pyiceberg.catalog import load_catalog
from decimal import Decimal
import pyarrow as pa

catalog = load_catalog("default") #my default catalog is glue
namespace = 'demo_ns'
table_name = 'test_table_dummy_orc_demo'
pylist = [{'decimal_col': Decimal('32768.1'), 'int_col': 1, 'string_col': "demo_one"},
          {'decimal_col': Decimal('44456.1'), 'int_col': 2, 'string_col': "demo_two"}]
arrow_schema = pa.schema(
    [
        pa.field('decimal_col', pa.decimal128(33, 1)),
        pa.field('int_col',  pa.int32()),
        pa.field('string_col', pa.string()),
    ],
)
arrow_table = pa.Table.from_pylist(pylist, schema=arrow_schema)
new_table = catalog.create_table(
    identifier=f'{namespace}.{table_name}',
    schema=arrow_schema,
    properties={
        'format': 'orc'
    }

table.append(arrow_table)