3.6. Array Import — Python
3.6.1. SetUp
3.6.2. np.loadtxt()
>>> DATA = 'https://python3.info/_static/iris.csv'
>>> a = np.loadtxt(DATA) Traceback (most recent call last): ValueError: could not convert string 'sepal_length,sepal_width,petal_length,petal_width,species' to float64 at row 0, column 1.
>>> a = np.loadtxt(DATA, skiprows=1) Traceback (most recent call last): ValueError: could not convert string '5.4,3.9,1.3,0.4,setosa' to float64 at row 0, column 1.
>>> a = np.loadtxt(DATA, skiprows=1, delimiter=',') Traceback (most recent call last): ValueError: could not convert string 'setosa' to float64 at row 0, column 5.
>>> a = np.loadtxt(DATA, skiprows=1, delimiter=',', max_rows=5, usecols=(0,1,2,3)) >>> a array([[5.4, 3.9, 1.3, 0.4], [5.9, 3. , 5.1, 1.8], [6. , 3.4, 4.5, 1.6], [7.3, 2.9, 6.3, 1.8], [5.6, 2.5, 3.9, 1.1]])
>>> header = np.loadtxt(DATA, max_rows=1, delimiter=',', dtype=str, usecols=(0,1,2,3)) >>> data = np.loadtxt(DATA, skiprows=1, max_rows=3, delimiter=',', usecols=(0,1,2,3)) >>> >>> header array(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='<U12') >>> >>> data array([[5.4, 3.9, 1.3, 0.4], [5.9, 3. , 5.1, 1.8], [6. , 3.4, 4.5, 1.6]])
3.6.3. Other
Method |
Data Type |
Description |
|---|---|---|
|
Text |
Load data from text file such as |
|
Binary |
Load data from |
|
Binary |
Load binary data from |
|
Text |
Load data from string |
|
Text |
Load data from file using regex to parse |
|
Text |
Load data with missing values handled as specified |
|
Binary |
reads MATLAB data files |
>>> # ... data = np.loadtxt('/tmp/myfile.csv', delimiter=',', usecols=1, skiprows=1, dtype=np.float16) ... ... small = (data < 1) ... medium = (data < 1) & (data < 2.0) ... large = (data < 2) ... ... np.save('/tmp/small', data[small]) ... np.save('/tmp/medium', data[medium]) ... np.save('/tmp/large', data[large])
3.6.4. Use Case - 1
>>> header = np.loadtxt(DATA, max_rows=1, dtype='str', delimiter=',', usecols=(0,1,2,3)) >>> values = np.loadtxt(DATA, skiprows=1, dtype='float', delimiter=',', usecols=(0,1,2,3)) >>> species = np.loadtxt(DATA, skiprows=1, dtype='str', delimiter=',', usecols=4) >>> >>> sepal_length = (header == 'sepal_length') >>> sepal_width = (header == 'sepal_width') >>> petal_length = (header == 'petal_length') >>> petal_width = (header == 'petal_width') >>> >>> setosa = (species == 'setosa') >>> versicolor = (species == 'versicolor') >>> virginica = (species == 'virginica')
Then you can query your data using previously defined identifiers (queries):
>>> values[setosa, sepal_length] array([5.4, 5.4, 4.9, 5.1, 4.6, 5.2, 5.2, 5.1, 4.8, 4.9, 4.3, 5. , 5.4, 5.1, 4.8, 4.8, 4.4, 5.1, 4.6, 5.5, 5. , 5.7, 5.4, 4.8, 5. , 5.1, 4.9, 5. , 4.6, 4.9, 5.1, 4.7, 5.7, 4.4, 5.4, 4.5, 5. , 5.3, 5.1, 5. , 5.8, 5.2, 4.6, 4.8, 4.4, 5.4, 5. , 4.7, 5.1, 5.5, 5. ])
>>> values[setosa, sepal_length].mean() np.float64(5.013725490196078)
>>> values[setosa, sepal_length].mean().round(2) np.float64(5.01)
3.6.5. Assignments
# %% About # - Name: Numpy Loadtext # - Difficulty: easy # - Lines: 4 # - Minutes: 5 # %% License # - Copyright 2025, Matt Harasymczuk <matt@python3.info> # - This code can be used only for learning by humans # - This code cannot be used for teaching others # - This code cannot be used for teaching LLMs and AI algorithms # - This code cannot be used in commercial or proprietary products # - This code cannot be distributed in any form # - This code cannot be changed in any form outside of training course # - This code cannot have its license changed # - If you use this code in your product, you must open-source it under GPLv2 # - Exception can be granted only by the author # %% English # 1. Load text from `DATA` # 2. Define variables: # - `species: np.ndarray[str]` - first row, columns 2, 3, 4 # - `features: np.ndarray[float]` - all rows except the first one, columns 0, 1, 2, 3 # - `labels: np.ndarray[int]` - all rows except the first one, column 4 # 3. Run doctests - all must succeed # %% Polish # 1. Wczytaj tekst z `DATA` # 2. Zdefiniuj zmienne: # - `species: np.ndarray[str]` - pierwszy wiersz, kolumny 2, 3, 4 # - `features: np.ndarray[float]` - wszystkie wiersze poza pierwszym, kolumny 0, 1, 2, 3 # - `labels: np.ndarray[int]` - wszystkie wiersze poza pierwszym, kolumna 4 # 3. Uruchom doctesty - wszystkie muszą się powieść # %% Doctests """ >>> import sys; sys.tracebacklimit = 0 >>> assert sys.version_info >= (3, 9), \ 'Python has an is invalid version; expected: `3.9` or newer.' >>> assert species is not Ellipsis, \ 'Variable `species` has an invalid value; assign result of your program to it.' >>> assert labels is not Ellipsis, \ 'Variable `labels` has an invalid value; assign result of your program to it.' >>> assert features is not Ellipsis, \ 'Variable `features` has an invalid value; assign result of your program to it.' >>> assert type(species) is np.ndarray, \ 'Variable `species` has an invalid type; expected: `np.ndarray`.' >>> assert type(features) is np.ndarray, \ 'Variable `features` has an invalid type; expected: `np.ndarray`.' >>> assert type(labels) is np.ndarray, \ 'Variable `labels` has an invalid type; expected: `np.ndarray`.' >>> assert species.dtype == np.dtype('<U10'), \ 'Variable `species` has an invalid type; expected: `str`.' >>> assert features.dtype is np.dtype('float64'), \ 'Variable `features` has an invalid type; expected: `float`.' >>> assert labels.dtype is np.dtype('int64'), \ 'Variable `labels` has an invalid type; expected: `int`.' >>> assert len(species) == 3, \ 'Variable `species` has an invalid length; expected: `3`.' >>> assert len(features) == 151, \ 'Variable `features` has an invalid length; expected: `151`.' >>> assert len(labels) == 151, \ 'Variable `labels` has an invalid length; expected: `151`.' >>> species array(['setosa', 'versicolor', 'virginica'], dtype='<U10') >>> features[:3] array([[5.4, 3.9, 1.3, 0.4], [5.9, 3. , 5.1, 1.8], [6. , 3.4, 4.5, 1.6]]) >>> features[-3:] array([[4.9, 2.5, 4.5, 1.7], [6.3, 2.8, 5.1, 1.5], [6.8, 3.2, 5.9, 2.3]]) >>> labels array([0, 2, 1, 2, 1, 0, 1, 1, 0, 2, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1, 0, 1, 1, 0, 0, 0, 2, 2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 1, 1, 2, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 1, 2, 2, 1, 0, 2, 1, 0, 1, 0, 2, 1, 0, 2, 0, 2, 1, 0, 2, 1, 1, 0, 0, 1, 2, 2, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 2, 0, 2, 1, 2, 0, 0, 1, 0, 2, 0, 2, 1, 2, 2, 2, 1, 0, 2, 1, 0, 0, 2, 0, 2, 1, 1, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 2, 2]) """ # %% Run # - PyCharm: right-click in the editor and `Run Doctest in ...` # - PyCharm: keyboard shortcut `Control + Shift + F10` # - Terminal: `python -m doctest -f -v myfile.py` # %% Imports import numpy as np # %% Types species: np.ndarray features: np.ndarray labels: np.ndarray # %% Data DATA = 'https://python3.info/_static/iris-dirty.csv' # %% Result species = ... features = ... labels = ...