GitHub - kishyassin/goframe: goframe is a Go package inspired by Python's pandas, designed for data manipulation and analysis.

goframe is a Go package inspired by Python's pandas, designed for data manipulation and analysis. It provides a DataFrame structure and Column types for handling and processing structured data efficiently.

Go Reference

Features

  • Typed columns with support for int, float64, string, and bool.
  • DataFrame operations such as adding/removing columns, filtering rows, and selecting subsets.
  • Auto-detection of column types during CSV import.
  • Statistical aggregations like Mean, Sum, Min, and Max.
  • Join operations: Perform inner, left, right, and outer joins between DataFrames.
  • Row operations: Access rows (Row), retrieve subsets (Head, Tail), append rows (AppendRow), and remove rows (DropRow).
  • Multiple Column Selection: Select multiple columns using the MultiSelect method.
  • Column renaming: Rename columns using the RenameColumn method.
  • CSV export: Save DataFrames to CSV files using ToCSV and ToCSVWriter.
  • Time Series Support: Add datetime indexing, resampling, and shifting for time series data.
  • Visualization: Generate line and bar plots directly from DataFrames.

Installation

To install goframe, use:

go get github.com/kishyassin/goframe

Usage

Creating a DataFrame

package main

import (
	"fmt"
	"github.com/kishyassin/goframe"
)

func main() {
	// Create a new DataFrame
	df := goframe.NewDataFrame()

	// Add columns to the DataFrame
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35})))

	// Print the DataFrame
	fmt.Println(df)
}

Importing from CSV

package main

import (
	"fmt"
	"log"
	"strings"

	"github.com/kishyassin/goframe"
)

func main() {
	reader := strings.NewReader(`name,age,salary
Alice,25,50000
Bob,30,60000
Charlie,35,70000`)
	df, err := goframe.FromCSVReader(reader)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(df)
}

Joining DataFrames

package main

import (
	"fmt"
	"log"

	"github.com/kishyassin/goframe"
)

func main() {
	df1 := goframe.NewDataFrame()
	df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("id", []int{1, 2, 3})))
	df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value1", []string{"A", "B", "C"})))

	df2 := goframe.NewDataFrame()
	df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("id", []int{2, 3, 4})))
	df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value2", []string{"X", "Y", "Z"})))

	// Perform an inner join
	joined, err := df1.InnerJoin(df2, "id")
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(joined)
}

Grouping by multiple columns (Groupby list of strings)

package main

import (
	"fmt"
	"log"

	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("city", []string{"A", "A", "B"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("category", []string{"x", "y", "x"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value", []int{10, 20, 30})))

	// Group by multiple columns using a list of strings
	gdf := df.Groupby([]string{"city", "category"})
	if gdf.Error() != nil {
		log.Fatal(gdf.Error())
	}

	// Aggregate (sum) the "value" column for each group
	summed, err := gdf.Sum("value")
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println("Grouped and summed:")
	fmt.Println(summed)
}

Adding two DataFrames (DataFrame.Add)

package main

import (
	"fmt"
    "log"

    "github.com/kishyassin/goframe"
)

func main() {
	df1 := goframe.NewDataFrame()
	df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("a", []int{1, 2, 3})))
	df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("b", []int{10, 20, 30})))

	df2 := goframe.NewDataFrame()
	df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("a", []int{4, 5, 6})))
    df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("b", []int{40, 50, 60})))

	// Add the two dataframes element-wise (numeric columns are summed)
    summed, err := df1.Add(df2)
    if err != nil {
		log.Fatal(err)
    }

	fmt.Println("Summed DataFrame:")
	fmt.Println(summed)
}

Row Operations

package main

import (
	"fmt"
	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35})))

	// Access a row
	row, _ := df.Row(1)
	fmt.Println("Row 1:", row)

	// Get the first two rows
	head := df.Head(2)
	fmt.Println("Head:", head)

	// Append a new row
	df.AppendRow(df, map[string]any{"name": "Diana", "age": 40})
	fmt.Println("After appending a row:", df)

	// Drop a row
	df.DropRow(1)
	fmt.Println("After dropping a row:", df)
}

Multiple Column Selection

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("salary", []int{100, 200, 300})))

	selectedDf, err := df.MultiSelect("name", "salary")
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println("The DataFrame with selected columns: ", selectedDf.String())
}

Renaming Columns

package main

import (
	"fmt"
	"log"

	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35})))

	err := df.RenameColumn("name", "full_name")
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println("Renamed column:", df)
}

Exporting to CSV

package main

import (
	"fmt"
	"log"

	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35})))

	err := df.ToCSV("output.csv")
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println("DataFrame exported to output.csv")
}

Advanced Features

Time Series

package main

import (
	"fmt"
	"time"
	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("date", []string{"2025-09-14", "2025-09-15", "2025-09-16"})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value", []float64{10.5, 20.3, 30.7})))

	// Convert the "date" column from string to time.Time
	dateCol, _ := df.Select("date")
	var dateTimes []time.Time
	for _, dateStr := range dateCol.Data {
		parsedDate, err := time.Parse("2006-01-02", dateStr.(string))
		if err != nil {
			fmt.Println("Error parsing date:", err)
			return
		}
		dateTimes = append(dateTimes, parsedDate)
	}
	df.Columns["date"] = goframe.ConvertToAnyColumn(goframe.NewColumn("date", dateTimes))

	// Resample data to daily frequency
	resampled, err := df.Resample("date", "D", func(values []any) any {
		// Example aggregation function: calculate the sum
		sum := 0.0
		for _, v := range values {
			if num, ok := v.(float64); ok {
				sum += num
			}
		}
		return sum
	})
	if err != nil {
		fmt.Println("Error during resampling:", err)
		return
	}

	fmt.Println("Resampled DataFrame:", resampled)
}

Visualization

package main

import (
	"fmt"
	"github.com/kishyassin/goframe"
)

func main() {
	df := goframe.NewDataFrame()
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("x", []float64{1, 2, 3})))
	df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("y", []float64{2.5, 3.5, 4.5})))

	// Generate a line plot
	err := df.LinePlot("x", "y", "line_plot.png")
	if err != nil {
		fmt.Println("Error generating line plot:", err)
		return
	}

	fmt.Println("Line plot saved as 'line_plot.png'")
}

API Reference

DataFrame Methods

  • NewDataFrame(): Create a new DataFrame.
  • AddColumn(column *Column[any]): Add a column to the DataFrame.
  • Row(index int): Retrieve a row by index.
  • InnerJoin(other *DataFrame, key string): Perform inner join operation.
  • OuterJoin(other *DataFrame, key string): Perform outer join operation.
  • LeftJoin(other *DataFrame, key string): Perform left join operation.
  • RightJoin(other *DataFrame, key string): Perform right join operation.
  • Resample(column string, frequency string): Resample time series data.
  • LinePlot(xCol, yCol, outputFile string): Generate a line plot.

Column Methods

  • NewColumn(name string, data []T): Create a new column.
  • At(index int): Retrieve a value by index.

Contributing

We welcome contributions from the community! If you'd like to contribute:

  1. Fork the repository.
  2. Create a new branch for your feature or bug fix.
  3. After the implementation/fix, run go test ./goframe_tests and ensure all tests pass.
  4. Submit a pull request with a clear description of your changes.

Please ensure your code adheres to the project's coding standards and includes tests for any new functionality.

Contributors

💖 Thanks to all the contributors who made GoFrame possible 💖

License

This project is licensed under the MIT License. See the LICENSE file for details.