Pandas
Pandas
import pandas as pd
df = (
pd.read_parquet("https://boincai.com/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet")
.groupby('horoscope')['text']
.apply(lambda x: x.str.len().mean())
.sort_values(ascending=False)
.head(5)
)df = (
pd.concat([pd.read_parquet(url) for url in urls])
.groupby('horoscope')['text']
.apply(lambda x: x.str.len().mean())
.sort_values(ascending=False)
.head(5)
)Last updated