Initial commit
This commit is contained in:
34
workshop/pandas-shoes/dask_shoes.py
Normal file
34
workshop/pandas-shoes/dask_shoes.py
Normal file
@ -0,0 +1,34 @@
|
||||
from contexttimer import Timer
|
||||
from dask import dataframe as dd
|
||||
from dask.distributed import Client
|
||||
import pandas as pd
|
||||
|
||||
if __name__ == '__main__':
|
||||
loops: int = 10
|
||||
client = Client(n_workers=loops)
|
||||
df: dd.DataFrame = dd.read_csv("data/womens-shoes.csv.xz", low_memory=False, dtype={"asins": "object", "prices.offer": "object", "upc": "object", "weight": "object"})
|
||||
pf = pd.read_csv("data/womens-shoes.csv.xz")
|
||||
af = pd.read_csv("data/womens-shoes.csv.xz", engine="pyarrow")
|
||||
|
||||
# Dask Dataframe (±4800ms)
|
||||
with Timer() as timer:
|
||||
for i in range(loops):
|
||||
df2 = df.sort_values("prices.amountMin", ascending=bool(i % 2)).compute()
|
||||
print(df2["prices.amountMin"].iloc[0])
|
||||
print(timer.elapsed)
|
||||
|
||||
# Pandas Dataframe (±35ms)
|
||||
with Timer() as timer:
|
||||
for i in range(loops):
|
||||
df2 = pf.sort_values("prices.amountMin", ascending=bool(i % 2))
|
||||
print(df2["prices.amountMin"].iloc[0])
|
||||
print(timer.elapsed)
|
||||
|
||||
# Pandas PyArrow Dataframe (±29ms)
|
||||
with Timer() as timer:
|
||||
for i in range(loops):
|
||||
df2 = af.sort_values("prices.amountMin", ascending=bool(i % 2))
|
||||
print(df2["prices.amountMin"].iloc[0])
|
||||
print(timer.elapsed)
|
||||
|
||||
|
Reference in New Issue
Block a user