Add source example files

2025-07-12 18:43:56 +02:00
parent 11d6846f49
commit d0bcfcf8f1
62 changed files with 40101 additions and 161 deletions
--- a/source/pandas/01-pandas-manual/01-series-demo.py
+++ b/source/pandas/01-pandas-manual/01-series-demo.py
@@ -0,0 +1,11 @@
+"""Define simple Series objects by hand."""
+import pandas as pd
+
+if __name__ == '__main__':
+    # Creating a series with coherent value type
+    s1 = pd.Series([1, 3, 7, 9, 13, 15, 19, 21])
+
+    # Get the length of the series
+    print(f"Size of the series: {len(s1)}")
+    # Display the contents of s1
+    print(s1)
--- a/source/pandas/01-pandas-manual/02-dataframe-demo.py
+++ b/source/pandas/01-pandas-manual/02-dataframe-demo.py
@@ -0,0 +1,35 @@
+"""Define simple DataFrame objects by hand."""
+import pandas as pd
+
+if __name__ == '__main__':
+    # Creating a series with coherent value type
+    df1 = pd.DataFrame({"age": [25, 45, 65], "prenom": ["Pierre", "Paul", "Jacques"]})
+
+    # Get the column names of the dataframe
+    print("Columns:", df1.columns.tolist())
+    # Get the number of rows in the dataframe
+    print(f"Size of the dataframe in rows: {len(df1)}")
+    # Show the type of the columns
+    print("Data type of columns (autodetected):")
+    type_dict = df1.dtypes.to_dict()
+    for column, dtype in type_dict.items():
+        print(f"{column:<20} : {str(dtype):<20}")
+    print("_" * 80)
+    # Display the contents of the dataframe
+    print(df1)
+
+    # Creating a series with coherent value type
+    df2 = pd.DataFrame([[25, "Pierre"], [45, "Paul"], [65, "Jacques"]])
+
+    # Get the column names of the dataframe
+    print("Columns:", df2.columns.tolist())
+    # Get the number of rows in the dataframe
+    print(f"Size of the dataframe in rows: {len(df2)}")
+    # Show the type of the columns
+    print("Data type of columns (autodetected):")
+    type_dict = df2.dtypes.to_dict()
+    for column, dtype in type_dict.items():
+        print(f"{column:<20} : {str(dtype):<20}")
+    print("_" * 80)
+    # Display the contents of the dataframe
+    print(df2)
--- a/source/pandas/01-pandas-manual/03-dataframe-index-demo.py
+++ b/source/pandas/01-pandas-manual/03-dataframe-index-demo.py
@@ -0,0 +1,27 @@
+"""
+Create a DataFrame and use indexes to
+"""
+import pandas as pd
+
+# Create a dataframe and associate an index to it
+df = pd.DataFrame(
+    data={"name": ["Mac", "Ann", "Rob"], "age": [33, 44, 55]},
+    index=["u1", "u2", "u3"]  # as many values as rows
+)
+
+# Show normal DataFrame
+print(df)
+
+# Access one row using an index value
+print(df.loc["u1"])
+
+# Access the same row using a numerical index
+print(df.iloc[0])
+
+# Get a DataFrame with a selection of lines
+# To extract this, the selection of lines **must** be a list and not a tuple;
+# the tuple is used to select or slice in the other axis.
+print(df.loc[["u1", "u3", "u2"]])
+
+# Show the index0
+print(df.index)
--- a/source/pandas/01-pandas-manual/04-dataframe-load-csv-url.py
+++ b/source/pandas/01-pandas-manual/04-dataframe-load-csv-url.py
@@ -0,0 +1,21 @@
+"""
+Read an online CSV file into a DataFrame.
+
+Since the referenced file contains a datetime column, and by default
+read_csv does not interpret data from the text file, you have to replace
+some columns with their conversion as a correct dtype.
+
+Or better, you can directly tell the read_csv function to interpret
+
+"""
+import pandas as pd
+
+
+url = "https://media.githubusercontent.com/media/datablist/sample-csv-files/main/files/customers/customers-100.csv"
+df = pd.read_csv(url, parse_dates=["Subscription Date"])
+print(df.to_string(max_cols=7))
+print(df.dtypes)
+
+# Remplacer une colonne avec une conversionl
+df["Subscription Date"] = pd.to_datetime(df["Subscription Date"])
+print(df, df.dtypes)
--- a/source/pandas/01-pandas-manual/05-series-index.py
+++ b/source/pandas/01-pandas-manual/05-series-index.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+if __name__ == '__main__':
+    df = pd.Series([1, 3, 2, 4, 2, 2, 2, 1, 1, 3])
+    # Add an index to the series, that could be used to make a
+    # temporal series.
+    df.index = pd.Series([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
+    print(df)
+
+
+s = pd.Series([2, 5, 2, 6], index=pd.date_range("2023-01-01", "2023-01-10", 4))
+print(s)
+dr = pd.date_range("2023-01-01", "2023-01-10", 4)
+print(dr)