python使用pandas读xlsx文件的实现

2024-04-02 19:55

短信预约 -IT技能 免费直播动态提醒

使用pandas读xlsx文件

读取前n行数据
读取指定数据（指定行指定列）
获取文件行号和列标题
将数据转换为字典形式

import pandas as pd
#1.读取前n行所有数据
df1=pd.read_excel('d1.xlsx')#读取xlsx中的第一个sheet
data1=df1.head(10)#读取前10行所有数据
data2=df1.values#list【】  相当于一个矩阵，以行为单位
#data2=df.values()   报错：TypeError: 'numpy.ndarray' object is not callable
print("获取到所有的值：\n{0}".format(data1))#格式化输出
print("获取到所有的值：\n{0}".format(data2))
 
#2.读取特定行特定列
data3=df1.iloc[0].values#读取第一行所有数据
data4=df1.iloc[1,1]#读取指定行列位置数据：读取（1，1）位置的数据
data5=df1.iloc[[1,2]].values#读取指定多行：读取第一行和第二行所有数据
data6=df1.iloc[:,[0]].values#读取指定列的所有行数据：读取第一列所有数据
print("数据：\n{0}".format(data3))
print("数据：\n{0}".format(data4))
print("数据：\n{0}".format(data5))
print("数据：\n{0}".format(data6))
 
#3.获取xlsx文件行号、列号
print("输出行号列表{}".format(df1.index.values))#获取所有行的编号：0、1、2、3、4
print("输出列标题{}".format(df1.columns.values))#也就是每列的第一个元素
 
#4.将xlsx数据转换为字典
data=[]
for i in df1.index.values:#获取行号的索引，并对其遍历
    #根据i来获取每一行指定的数据，并用to_dict转成字典
    row_data=df1.loc[i,['id','name','class','data','score',]].to_dict()
    data.append(row_data)
print("最终获取到的数据是：{0}".format(data))
 
#iloc和loc的区别：iloc根据行号来索引，loc根据index来索引。
#所以1，2，3应该用iloc，4应该有loc

数据：d1.xlsx

id	name	class	data	score
201901	A	1	Jan-20	1.3
201902	B	2	Mar-20	3.4
201903	C	3	May-20	3.4
201904	D	1	Jan-20	3.4
201905	E	1	Feb-20	5.6
201906	F	1	Mar-20	4.6
201907	G	1	Feb-19	7.8
201908	H	2	Apr-30	5.6
201909	I	3	Jan-42	5.6
201910	G	4	Mar-30	4.5
201911	K	5	Apr-20	3.4
201912	L	6	Apr-20	2.3
201913	M	4	Mar-20	2.4