import numpy as np
# Define the coefficient matrix A and the constant vector b
A = np.array([[1, 1], # Coefficients for heads equation
[2, 4]]) # Coefficients for legs equation
b = np.array([35, 94]) # Constants (total heads and legs)
# Solve the system of equations
solution = np.linalg.solve(A, b)
print(f"Chickens: {int(solution[0])}")
print(f"Rabbits: {int(solution[1])}")
import numpy as np
# From a list
arr1 = np.array([1, 2, 3, 4, 5])
# Using NumPy functions
arr2 = np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
arr3 = np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1]
arr4 = np.zeros((3, 3)) # 3x3 array of zeros
arr5 = np.ones((2, 4)) # 2x4 array of ones
arr6 = np.random.rand(3, 3) # 3x3 array of random values
Array operations:
np.reshape()
: Reshape an arraynp.concatenate()
: Join arraysnp.split()
: Split an arrayMathematical operations:
np.sum()
, np.mean()
, np.std()
: Basic statisticsnp.min()
, np.max()
: Find minimum and maximum valuesnp.argmin()
, np.argmax()
: Find indices of min/max valuesLinear algebra:
np.dot()
: Matrix multiplicationnp.linalg.inv()
: Matrix inversenp.linalg.eig()
: Eigenvalues and eigenvectorsArray manipulation:
np.transpose()
: Transpose an arraynp.sort()
: Sort an arraynp.unique()
: Find unique elementsimport numpy as np
help(np.array)
?
operator:np.array?
Let's compare the speed of calculating the mean of a large array:
import numpy as np
import time
# Create large arrays
size = 10000000
data = list(range(size))
np_data = np.array(data)
# Python list comprehension
start = time.time()
result_py = [x**2 + 2*x + 1 for x in data]
end = time.time()
print(f"Python time: {end - start:.6f} seconds")
# NumPy vectorized operation
start = time.time()
result_np = np_data**2 + 2*np_data + 1
end = time.time()
print(f"NumPy time: {end - start:.6f} seconds")
# NumPy is significantly faster due to its optimized C implementation.
We'll use NumPy to analyze earthquake data:
import numpy as np
# Load earthquake data (magnitude and depth)
# the first coloumn is utc datetime
earthquakes = np.loadtxt("data/earthquakes.csv", delimiter=",", skiprows=1, usecols=(1, 2, 3, 4), dtype=float)
# Calculate average magnitude and depth
avg_depth = np.mean(earthquakes[:, 2])
avg_magnitude = np.mean(earthquakes[:, 3])
# Find the strongest earthquake
strongest_idx = np.argmax(earthquakes[:, 3])
strongest_magnitude = earthquakes[strongest_idx, 3]
strongest_depth = earthquakes[strongest_idx, 2]
print(f"Average magnitude: M{avg_magnitude:.2f}")
print(f"Average depth: {avg_depth:.2f} km")
print(f"Strongest earthquake: Magnitude {strongest_magnitude:.2f} at depth {strongest_depth:.2f} km")
import pandas as pd
# Create a Series
s = pd.Series([1, 3, 5, np.nan, 6, 8])
# Create a DataFrame
df = pd.DataFrame({
'A': [1, 2, 3, 4],
'B': pd.date_range('20230101', periods=4),
'C': pd.Series(1, index=range(4), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'
})
Data loading and saving:
pd.read_csv()
, pd.read_excel()
, pd.read_sql()
df.to_csv()
, df.to_excel()
, df.to_sql()
Data inspection:
df.head()
, df.tail()
: View first/last rowsdf.info()
: Summary of DataFramedf.describe()
: Statistical summaryData selection:
df['column']
: Select a columndf.loc[]
: Label-based indexingdf.iloc[]
: Integer-based indexingData manipulation:
df.groupby()
: Group datadf.merge()
: Merge DataFramesdf.pivot()
: Reshape dataData cleaning:
df.dropna()
: Drop missing valuesdf.fillna()
: Fill missing valuesdf.drop_duplicates()
: Remove duplicate rowsTime series functionality:
pd.date_range()
: Create date rangesdf.resample()
: Resample time series dataimport pandas as pd
help(pd.DataFrame)
?
operator:pd.DataFrame?
We'll use Pandas to analyze earthquake data this time:
import pandas as pd
# Load earthquake data
df = pd.read_csv("data/earthquakes.csv")
# Calculate average magnitude and depth
avg_depth = df['depth'].mean()
avg_magnitude = df['magnitude'].mean()
# Find the strongest earthquake
strongest_idx = df['magnitude'].idxmax()
strongest_magnitude = df.loc[strongest_idx, 'magnitude']
strongest_depth = df.loc[strongest_idx, 'depth']
print(f"Average magnitude: M{avg_magnitude:.2f}")
print(f"Average depth: {avg_depth:.2f} km")
print(f"Strongest earthquake: Magnitude {strongest_magnitude:.2f} at depth {strongest_depth:.2f} km")
We'll use Pandas to analyze temperature data:
import pandas as pd
# Load temperature data
df = pd.read_csv("data/global_temperature.csv")
# Convert date column to datetime
df["date"] = pd.to_datetime(df["date"])
# Set date as index
df.set_index("date", inplace=True)
# Find the hottest and coldest days
hottest_day = df["temperature"].idxmax()
coldest_day = df["temperature"].idxmin()
print(f"Hottest day: {hottest_day.date()} ({df.loc[hottest_day, 'temperature']:.1f}°C)")
print(f"Coldest day: {coldest_day.date()} ({df.loc[coldest_day, 'temperature']:.1f}°C)")
# Calculate monthly average temperatures
yearly_avg = df.resample("Y").mean()
# Plot monthly average temperatures
yearly_avg["temperature"].plot(figsize=(12, 6))
plt.title("Yearly Average Temperatures")
plt.ylabel("Temperature (°C)")
plt.show()