发布时间:2023-07-10 14:00
面对繁多的报表,如何从中快速的选取需要的字段,并按照一定规则加以计算,是工作中经常遇到的问题。本文记录了用pandas处理excel财务报表并加以计算的全过程。
从财务报表中(此项目中为excel文件)的资产负债表中筛选出资产总计、所有者权益的期初数与期末数,和利润表中营业总收入、已赚保费的期初数和当年累计,并计算同比和环比情况,写入新的excel中。
通常财务数据报表文件数目较多,且每个文件中包含多个sheet。此项目中的源数据文件按照年、月储存在根目录下。
观察文件名发现文件的前6位为报表的所选日期,观察数据发现保存在每个sheet中的第一列第一行。
导入所需的pandas、os库。用os.walk()的方法遍历根目录,以获取每个文件的绝对路径,pd.read_excel()读取数据。
利用pandas写入excel有多种方法,这里采用将数据保存为字典的方式写入,字典的键为字段名,值为字段的值,即
writen_data={\'filed_name1\':value,\'filed_name2’:value} 。首先我们先构建存储字段所需的字典,然后再按照字段名称寻找对应的数据。
值得注意的是财报12月报标识为13代表年报,在处理数据时应对应减1。
field = [\'date\', \'year\', \'quarter\', \'month\', \'corporation_name\', \'shareholder_equity_by\',
\'shareholder_equity_eb\', \'total_revenue_nm\', \'total_revenue_ay\']
written_data = dict(zip(field, [[] for _ in range(len(field))]))
读取文件中的利润表和资产负债表
import os
import pandas as pd
path = r\'C:\\ROC\\Files\\报表\' # root path
for root, dirs, files in os.walk(path):
for f in files:
file_path = os.path.join(root, f)
print(\'Processing {}\'.format(file_path))
df_bs = pd.read_excel(file_path, sheet_name=\'balnce_sheet\') # 资产负债表
df_is = pd.read_excel(file_path, sheet_name=\'income_statements\') # 利润表
读取年、月数据,并计算出对应季度数据,get_quarter在文末
if int(f[4:6]) == 13:
date = int(f[:6]) - 1 # 年报月份为13,应减1
else:
date = int(f[:6]) # 文件名前6个字符为日期
year = int(str(date)[:4])
month = int(str(date)[4:])
quarter = get_quarter(date)
written_data[\'date\'].append(date)
written_data[\'year\'].append(year)
written_data[\'month\'].append(month)
written_data[\'quarter\'].append(quarter)
按照在报表中的位置寻找对应字段
corporation_name = str(df_bs.iloc[1, 0])[3:] # 公司名
corporation_num = get_corporation_num(corporation_name)
orignal_data[\'corporation_name\'].append(corporation_num)
total_asset = df_bs.loc[df[\'资产负债表\'] == \'资产总计\', [\'Unnamed: 2\', \'Unnamed: 3\']]
total_asset_by = total_asset.values[0][0] # 资产总计年初数
total_asset_eb = total_asset.values[0][1] # 资产总计期末数
orignal_data[\'total_asset_by\'].append(total_asset_by)
orignal_data[\'total_asset_eb\'].append(total_asset_eb)
try:
shareholder_equity = df_bs.loc[df[\'Unnamed: 4\'] == \'所有者权益(或股东权益)合计\', [\'Unnamed: 6\', \'Unnamed: 7\']]
shareholder_equity_by = shareholder_equity.values[0][0] # 所有者权益(或股东权益)合计年初数
shareholder_equity_eb = shareholder_equity.values[0][1] # 所有者权益(或股东权益)合计期末数
except:
shareholder_equity = df.loc[df[\'Unnamed: 5\'] == \'所有者权益(或股东权益)合计\', [\'Unnamed: 7\', \'Unnamed: 8\']]
shareholder_equity_by = shareholder_equity.values[0][0]
shareholder_equity_eb = shareholder_equity.values[0][1]
orignal_data[\'shareholder_equity_by\'].append(shareholder_equity_by)
orignal_data[\'shareholder_equity_eb\'].append(shareholder_equity_eb)
total_revenue = df_is.loc[df_is[\'利润表\'] == \'一、营业总收入\', [\'Unnamed: 2\', \'Unnamed: 3\']]
total_revenue_nm = total_revenue.values[0][0] # 营业总收入本月数
total_revenue_ay = total_revenue.values[0][1] # 营业总收入本年累计
orignal_data[\'total_revenue_nm\'].append(total_revenue_nm)
orignal_data[\'total_revenue_ay\'].append(total_revenue_ay)
total_revenue = df_is.loc[df_is[\'利润表\'] == \'已赚保费\', [\'Unnamed: 2\', \'Unnamed: 3\']]
total_revenue_nm = total_revenue.values[0][0] # 营业总收入本月数
total_revenue_ay = total_revenue.values[0][1] # 营业总收入本年累计
orignal_data[\'total_revenue_nm\'].append(total_revenue_nm)
orignal_data[\'total_revenue_ay\'].append(total_revenue_ay)
保存文件
df = pd.DataFrame(data=orignal_data)
writer = pd.ExcelWriter(out_ptah) # to_excel 追加读写
df.to_excel(excel_writer=writer, sheet_name=\"orignal_data\", index=True)
writer.save()
writer.close()
def get_quarter(date):
month = int(str(date)[4:])
if month <= 3:
quarter = 1
elif 4 <= month <= 6:
quarter = 2
elif 7 <= month <= 9:
quarter = 3
else:
quarter = 4
return quarter