pandas的简单使用记录
创建
由数据库查询创建
自定义数据创建
1 2 3 4 5 6 7 8 9 10 11 12 lst=[[1 ,2 ,'apple' ],[3 ,4 ,'banan' ]] frm=pd.DataFrame(lst, columns=['tag1' ,'tag2' , 'tag3' ]) frm2=frm.loc[:,['tag1' , 'tag3' ]] query = Model.query.session.query(Model.local_date, Model.data_source, func.sum (Model.total).label('t' ), func.sum (Model.count).label('c' )) with AlchemyDbUtil(db, 'statistics' ) as dbutil: pdf=pd.read_sql(query.statement, dbutil.conn)
查看 * head, tail, describle()
* 排序
1 frame.sort_index(by=['column1' ],ascending=False )
* 过滤
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 frm3=frm2.dropna(subset=['tag2' ]) frm2.dropna(axis=0 , how="any" ) frm3=frm3[frm3['tag2' ]!='' ] frm3=frm3[frm3['tag2' ]!=frm3['tag1' ]] frm_main.nlargest(10 , 'cnt' ) frm_main.groupby(by=['airline_code' , 'iata_code' ]).apply(lambda x: x.nlargest(1 ,"cnt" ))
* 遍历
|tag1| tag2| cnt|
|---|---|---|
|6H |C3|6|
|8Q |SV|4|
1 2 for idx , item in frm4.iterrows(): print idx, '--' , item[0 ], item[1 ], item[2 ]
* 导出 to_csv, clipboard ...

* 统计
1 2 3 4 5 frm4=frm3.groupby(['tag1' , 'tag2' ]).size().reset_index(name='cnt' ).query('cnt >2' )
修改 * 动态生成\修改列
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 frm['tag_short' ]=frm['tag3' ].str [:2 ] reg_dict={ ... } for idx in frm.index: frm.loc[idx]['tag_new' ]=reg_dict.get(frm.loc[idx]['key' ], '' ) def apply_func (df ): return reg_dict.get(df['key' ], '' ) frm['tag_new' ]=frm.apply(lambda r:apply_apairline(r), axis=1 )
* 清除空数据
1 2 df4 = df4.dropna(subset=["age" , "sex" ])