goframe is a Go package inspired by Python's pandas, designed for data manipulation and analysis. It provides a DataFrame structure and Column types for handling and processing structured data efficiently.
Features
- Typed columns with support for
int,float64,string, andbool. - DataFrame operations such as adding/removing columns, filtering rows, and selecting subsets.
- Auto-detection of column types during CSV import.
- Statistical aggregations like
Mean,Sum,Min, andMax. - Join operations: Perform
inner,left,right, andouterjoins between DataFrames. - Row operations: Access rows (
Row), retrieve subsets (Head,Tail), append rows (AppendRow), and remove rows (DropRow). - Multiple Column Selection: Select multiple columns using the
MultiSelectmethod. - Column renaming: Rename columns using the
RenameColumnmethod. - CSV export: Save DataFrames to CSV files using
ToCSVandToCSVWriter. - Time Series Support: Add datetime indexing, resampling, and shifting for time series data.
- Visualization: Generate line and bar plots directly from DataFrames.
Installation
To install goframe, use:
go get github.com/kishyassin/goframe
Usage
Creating a DataFrame
package main import ( "fmt" "github.com/kishyassin/goframe" ) func main() { // Create a new DataFrame df := goframe.NewDataFrame() // Add columns to the DataFrame df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35}))) // Print the DataFrame fmt.Println(df) }
Importing from CSV
package main import ( "fmt" "log" "strings" "github.com/kishyassin/goframe" ) func main() { reader := strings.NewReader(`name,age,salary Alice,25,50000 Bob,30,60000 Charlie,35,70000`) df, err := goframe.FromCSVReader(reader) if err != nil { log.Fatal(err) } fmt.Println(df) }
Joining DataFrames
package main import ( "fmt" "log" "github.com/kishyassin/goframe" ) func main() { df1 := goframe.NewDataFrame() df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("id", []int{1, 2, 3}))) df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value1", []string{"A", "B", "C"}))) df2 := goframe.NewDataFrame() df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("id", []int{2, 3, 4}))) df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value2", []string{"X", "Y", "Z"}))) // Perform an inner join joined, err := df1.InnerJoin(df2, "id") if err != nil { log.Fatal(err) } fmt.Println(joined) }
Grouping by multiple columns (Groupby list of strings)
package main import ( "fmt" "log" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("city", []string{"A", "A", "B"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("category", []string{"x", "y", "x"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value", []int{10, 20, 30}))) // Group by multiple columns using a list of strings gdf := df.Groupby([]string{"city", "category"}) if gdf.Error() != nil { log.Fatal(gdf.Error()) } // Aggregate (sum) the "value" column for each group summed, err := gdf.Sum("value") if err != nil { log.Fatal(err) } fmt.Println("Grouped and summed:") fmt.Println(summed) }
Adding two DataFrames (DataFrame.Add)
package main import ( "fmt" "log" "github.com/kishyassin/goframe" ) func main() { df1 := goframe.NewDataFrame() df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("a", []int{1, 2, 3}))) df1.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("b", []int{10, 20, 30}))) df2 := goframe.NewDataFrame() df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("a", []int{4, 5, 6}))) df2.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("b", []int{40, 50, 60}))) // Add the two dataframes element-wise (numeric columns are summed) summed, err := df1.Add(df2) if err != nil { log.Fatal(err) } fmt.Println("Summed DataFrame:") fmt.Println(summed) }
Row Operations
package main import ( "fmt" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35}))) // Access a row row, _ := df.Row(1) fmt.Println("Row 1:", row) // Get the first two rows head := df.Head(2) fmt.Println("Head:", head) // Append a new row df.AppendRow(df, map[string]any{"name": "Diana", "age": 40}) fmt.Println("After appending a row:", df) // Drop a row df.DropRow(1) fmt.Println("After dropping a row:", df) }
Multiple Column Selection
func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("salary", []int{100, 200, 300}))) selectedDf, err := df.MultiSelect("name", "salary") if err != nil { log.Fatal(err) } fmt.Println("The DataFrame with selected columns: ", selectedDf.String()) }
Renaming Columns
package main import ( "fmt" "log" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35}))) err := df.RenameColumn("name", "full_name") if err != nil { log.Fatal(err) } fmt.Println("Renamed column:", df) }
Exporting to CSV
package main import ( "fmt" "log" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("name", []string{"Alice", "Bob", "Charlie"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("age", []int{25, 30, 35}))) err := df.ToCSV("output.csv") if err != nil { log.Fatal(err) } fmt.Println("DataFrame exported to output.csv") }
Advanced Features
Time Series
package main import ( "fmt" "time" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("date", []string{"2025-09-14", "2025-09-15", "2025-09-16"}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("value", []float64{10.5, 20.3, 30.7}))) // Convert the "date" column from string to time.Time dateCol, _ := df.Select("date") var dateTimes []time.Time for _, dateStr := range dateCol.Data { parsedDate, err := time.Parse("2006-01-02", dateStr.(string)) if err != nil { fmt.Println("Error parsing date:", err) return } dateTimes = append(dateTimes, parsedDate) } df.Columns["date"] = goframe.ConvertToAnyColumn(goframe.NewColumn("date", dateTimes)) // Resample data to daily frequency resampled, err := df.Resample("date", "D", func(values []any) any { // Example aggregation function: calculate the sum sum := 0.0 for _, v := range values { if num, ok := v.(float64); ok { sum += num } } return sum }) if err != nil { fmt.Println("Error during resampling:", err) return } fmt.Println("Resampled DataFrame:", resampled) }
Visualization
package main import ( "fmt" "github.com/kishyassin/goframe" ) func main() { df := goframe.NewDataFrame() df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("x", []float64{1, 2, 3}))) df.AddColumn(goframe.ConvertToAnyColumn(goframe.NewColumn("y", []float64{2.5, 3.5, 4.5}))) // Generate a line plot err := df.LinePlot("x", "y", "line_plot.png") if err != nil { fmt.Println("Error generating line plot:", err) return } fmt.Println("Line plot saved as 'line_plot.png'") }
API Reference
DataFrame Methods
NewDataFrame(): Create a new DataFrame.AddColumn(column *Column[any]): Add a column to the DataFrame.Row(index int): Retrieve a row by index.InnerJoin(other *DataFrame, key string): Perform inner join operation.OuterJoin(other *DataFrame, key string): Perform outer join operation.LeftJoin(other *DataFrame, key string): Perform left join operation.RightJoin(other *DataFrame, key string): Perform right join operation.Resample(column string, frequency string): Resample time series data.LinePlot(xCol, yCol, outputFile string): Generate a line plot.
Column Methods
NewColumn(name string, data []T): Create a new column.At(index int): Retrieve a value by index.
Contributing
We welcome contributions from the community! If you'd like to contribute:
- Fork the repository.
- Create a new branch for your feature or bug fix.
- After the implementation/fix, run
go test ./goframe_testsand ensure all tests pass. - Submit a pull request with a clear description of your changes.
Please ensure your code adheres to the project's coding standards and includes tests for any new functionality.
Contributors
💖 Thanks to all the contributors who made GoFrame possible 💖
License
This project is licensed under the MIT License. See the LICENSE file for details.