DuckDB
DuckDB
import duckdb
url = "https://boincai.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet"
con = duckdb.connect()
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")con.sql(f"SELECT horoscope, count(*), AVG(LENGTH(text)) AS avg_blog_length FROM '{url}' GROUP BY horoscope ORDER BY avg_blog_length DESC LIMIT(5)")
βββββββββββββ¬βββββββββββββββ¬βββββββββββββββββββββ
β horoscope β count_star() β avg_blog_length β
β varchar β int64 β double β
βββββββββββββΌβββββββββββββββΌβββββββββββββββββββββ€
β Aquarius β 34062 β 1129.218836239798 β
β Cancer β 41509 β 1098.366812016671 β
β Capricorn β 33961 β 1073.2002002296751 β
β Libra β 40302 β 1072.0718326633914 β
β Leo β 40587 β 1064.0536871412028 β
βββββββββββββ΄βββββββββββββββ΄βββββββββββββββββββββLast updated