Add source example files

This commit is contained in:
2025-07-12 18:43:56 +02:00
parent 11d6846f49
commit d0bcfcf8f1
62 changed files with 40101 additions and 161 deletions

View File

@ -0,0 +1,11 @@
"""Define simple Series objects by hand."""
import pandas as pd
if __name__ == '__main__':
# Creating a series with coherent value type
s1 = pd.Series([1, 3, 7, 9, 13, 15, 19, 21])
# Get the length of the series
print(f"Size of the series: {len(s1)}")
# Display the contents of s1
print(s1)

View File

@ -0,0 +1,35 @@
"""Define simple DataFrame objects by hand."""
import pandas as pd
if __name__ == '__main__':
# Creating a series with coherent value type
df1 = pd.DataFrame({"age": [25, 45, 65], "prenom": ["Pierre", "Paul", "Jacques"]})
# Get the column names of the dataframe
print("Columns:", df1.columns.tolist())
# Get the number of rows in the dataframe
print(f"Size of the dataframe in rows: {len(df1)}")
# Show the type of the columns
print("Data type of columns (autodetected):")
type_dict = df1.dtypes.to_dict()
for column, dtype in type_dict.items():
print(f"{column:<20} : {str(dtype):<20}")
print("_" * 80)
# Display the contents of the dataframe
print(df1)
# Creating a series with coherent value type
df2 = pd.DataFrame([[25, "Pierre"], [45, "Paul"], [65, "Jacques"]])
# Get the column names of the dataframe
print("Columns:", df2.columns.tolist())
# Get the number of rows in the dataframe
print(f"Size of the dataframe in rows: {len(df2)}")
# Show the type of the columns
print("Data type of columns (autodetected):")
type_dict = df2.dtypes.to_dict()
for column, dtype in type_dict.items():
print(f"{column:<20} : {str(dtype):<20}")
print("_" * 80)
# Display the contents of the dataframe
print(df2)

View File

@ -0,0 +1,27 @@
"""
Create a DataFrame and use indexes to
"""
import pandas as pd
# Create a dataframe and associate an index to it
df = pd.DataFrame(
data={"name": ["Mac", "Ann", "Rob"], "age": [33, 44, 55]},
index=["u1", "u2", "u3"] # as many values as rows
)
# Show normal DataFrame
print(df)
# Access one row using an index value
print(df.loc["u1"])
# Access the same row using a numerical index
print(df.iloc[0])
# Get a DataFrame with a selection of lines
# To extract this, the selection of lines **must** be a list and not a tuple;
# the tuple is used to select or slice in the other axis.
print(df.loc[["u1", "u3", "u2"]])
# Show the index0
print(df.index)

View File

@ -0,0 +1,21 @@
"""
Read an online CSV file into a DataFrame.
Since the referenced file contains a datetime column, and by default
read_csv does not interpret data from the text file, you have to replace
some columns with their conversion as a correct dtype.
Or better, you can directly tell the read_csv function to interpret
"""
import pandas as pd
url = "https://media.githubusercontent.com/media/datablist/sample-csv-files/main/files/customers/customers-100.csv"
df = pd.read_csv(url, parse_dates=["Subscription Date"])
print(df.to_string(max_cols=7))
print(df.dtypes)
# Remplacer une colonne avec une conversionl
df["Subscription Date"] = pd.to_datetime(df["Subscription Date"])
print(df, df.dtypes)

View File

@ -0,0 +1,14 @@
import pandas as pd
if __name__ == '__main__':
df = pd.Series([1, 3, 2, 4, 2, 2, 2, 1, 1, 3])
# Add an index to the series, that could be used to make a
# temporal series.
df.index = pd.Series([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
print(df)
s = pd.Series([2, 5, 2, 6], index=pd.date_range("2023-01-01", "2023-01-10", 4))
print(s)
dr = pd.date_range("2023-01-01", "2023-01-10", 4)
print(dr)