import numpy as np
# Define the coefficient matrix A and the constant vector b
A = np.array([[1, 1], # Coefficients for heads equation
[2, 4]]) # Coefficients for legs equation
b = np.array([35, 94]) # Constants (total heads and legs)
# Solve the system of equations
solution = np.linalg.solve(A, b)
print(f"Chickens: {int(solution[0])}")
print(f"Rabbits: {int(solution[1])}")
import numpy as np
# From a list
arr1 = np.array([1, 2, 3, 4, 5])
# Using NumPy functions
arr2 = np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
arr3 = np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1]
arr4 = np.zeros((3, 3)) # 3x3 array of zeros
arr5 = np.ones((2, 4)) # 2x4 array of ones
arr6 = np.random.rand(3, 3) # 3x3 array of random values
Array operations:
np.reshape(): Reshape an arraynp.concatenate(): Join arraysnp.split(): Split an arrayMathematical operations:
np.sum(), np.mean(), np.std(): Basic statisticsnp.min(), np.max(): Find minimum and maximum valuesnp.argmin(), np.argmax(): Find indices of min/max valuesLinear algebra:
np.dot(): Matrix multiplicationnp.linalg.inv(): Matrix inversenp.linalg.eig(): Eigenvalues and eigenvectorsArray manipulation:
np.transpose(): Transpose an arraynp.sort(): Sort an arraynp.unique(): Find unique elementsimport numpy as np
help(np.array)
? operator:np.array?
Let's compare the speed of calculating the mean of a large array:
import numpy as np
import time
# Create large arrays
size = 10000000
data = list(range(size))
np_data = np.array(data)
# Python list comprehension
start = time.time()
result_py = [x**2 + 2*x + 1 for x in data]
end = time.time()
print(f"Python time: {end - start:.6f} seconds")
# NumPy vectorized operation
start = time.time()
result_np = np_data**2 + 2*np_data + 1
end = time.time()
print(f"NumPy time: {end - start:.6f} seconds")
# NumPy is significantly faster due to its optimized C implementation.
We'll use NumPy to analyze earthquake data:
import numpy as np
# Load earthquake data (magnitude and depth)
# the first coloumn is utc datetime
earthquakes = np.loadtxt("data/earthquakes.csv", delimiter=",", skiprows=1, usecols=(1, 2, 3, 4), dtype=float)
# Calculate average magnitude and depth
avg_depth = np.mean(earthquakes[:, 2])
avg_magnitude = np.mean(earthquakes[:, 3])
# Find the strongest earthquake
strongest_idx = np.argmax(earthquakes[:, 3])
strongest_magnitude = earthquakes[strongest_idx, 3]
strongest_depth = earthquakes[strongest_idx, 2]
print(f"Average magnitude: M{avg_magnitude:.2f}")
print(f"Average depth: {avg_depth:.2f} km")
print(f"Strongest earthquake: Magnitude {strongest_magnitude:.2f} at depth {strongest_depth:.2f} km")
import pandas as pd
# Create a Series
s = pd.Series([1, 3, 5, np.nan, 6, 8])
# Create a DataFrame
df = pd.DataFrame({
'A': [1, 2, 3, 4],
'B': pd.date_range('20230101', periods=4),
'C': pd.Series(1, index=range(4), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'
})
Data loading and saving:
pd.read_csv(), pd.read_excel(), pd.read_sql()df.to_csv(), df.to_excel(), df.to_sql()Data inspection:
df.head(), df.tail(): View first/last rowsdf.info(): Summary of DataFramedf.describe(): Statistical summaryData selection:
df['column']: Select a columndf.loc[]: Label-based indexingdf.iloc[]: Integer-based indexingData manipulation:
df.groupby(): Group datadf.merge(): Merge DataFramesdf.pivot(): Reshape dataData cleaning:
df.dropna(): Drop missing valuesdf.fillna(): Fill missing valuesdf.drop_duplicates(): Remove duplicate rowsTime series functionality:
pd.date_range(): Create date rangesdf.resample(): Resample time series dataimport pandas as pd
help(pd.DataFrame)
? operator:pd.DataFrame?
We'll use Pandas to analyze earthquake data this time:
import pandas as pd
# Load earthquake data
df = pd.read_csv("data/earthquakes.csv")
# Calculate average magnitude and depth
avg_depth = df['depth'].mean()
avg_magnitude = df['magnitude'].mean()
# Find the strongest earthquake
strongest_idx = df['magnitude'].idxmax()
strongest_magnitude = df.loc[strongest_idx, 'magnitude']
strongest_depth = df.loc[strongest_idx, 'depth']
print(f"Average magnitude: M{avg_magnitude:.2f}")
print(f"Average depth: {avg_depth:.2f} km")
print(f"Strongest earthquake: Magnitude {strongest_magnitude:.2f} at depth {strongest_depth:.2f} km")
We'll use Pandas to analyze temperature data:
import pandas as pd
# Load temperature data
df = pd.read_csv("data/global_temperature.csv")
# Convert date column to datetime
df["date"] = pd.to_datetime(df["date"])
# Set date as index
df.set_index("date", inplace=True)
# Find the hottest and coldest days
hottest_day = df["temperature"].idxmax()
coldest_day = df["temperature"].idxmin()
print(f"Hottest day: {hottest_day.date()} ({df.loc[hottest_day, 'temperature']:.1f}°C)")
print(f"Coldest day: {coldest_day.date()} ({df.loc[coldest_day, 'temperature']:.1f}°C)")
# Calculate monthly average temperatures
yearly_avg = df.resample("Y").mean()
# Plot monthly average temperatures
yearly_avg["temperature"].plot(figsize=(12, 6))
plt.title("Yearly Average Temperatures")
plt.ylabel("Temperature (°C)")
plt.show()