import pandas as pd
create multiple index and locate data according to multi-level index
# set cik and year as index
data_target = data_target.set_index(['cik', 'year'])
# get one index value from multiple index
cik_list = data_target.index.get_level_values('cik').values
# get subset of data
data_1996 = data.loc[1995]
data_cik_3766 = data.loc[3766]
data_1996_cik_3766 = data.loc[1995,3766]
In pandas data frame we can remove duplicated index
data.index.duplicated(keep='first') # the value passed to keep can be modified