Skip to content

Instantly share code, notes, and snippets.

@kdrkdrkdr
Created September 1, 2025 08:24
Show Gist options
  • Save kdrkdrkdr/2473a8ca3618b5fbd28fd209a6947405 to your computer and use it in GitHub Desktop.
Save kdrkdrkdr/2473a8ca3618b5fbd28fd209a6947405 to your computer and use it in GitHub Desktop.
환경 체커톤 대회 엑셀 파싱
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.ExcelFile("UTF82024 국민환경의식 설문조사_원자료.xlsx", engine='openpyxl')
src = df.parse('원자료')
cb = df.parse('코드북')
# 임농업 데이터만
data = src[src['F2'] == 1]
for codenum in ['A', 'B', 'C', 'D']:
a_indices = cb[cb['변수명'].astype(str).str.startswith(codenum)].index.tolist()
# 각 A 문항별로 처리
for i, idx in enumerate(a_indices):
var_name = cb.iloc[idx]['변수명']
var_desc = cb.iloc[idx, 2] if len(cb.columns) > 2 else "" # C열: 변수 설명
if var_name in data.columns:
print(f"\n{'='*60}")
print(f"{var_name}: {var_desc}")
print(f"{'='*60}")
next_idx = a_indices[i+1] if i+1 < len(a_indices) else len(cb)
options_dict = {}
for j in range(idx, next_idx):
d_value = cb.iloc[j, 3] if len(cb.columns) > 3 else None # D열
if pd.notna(d_value):
d_str = str(d_value).strip()
if d_str and '.' in d_str:
parts = d_str.split('.', 1)
try:
num = int(parts[0].strip())
text = parts[1].strip()
options_dict[num] = text
except:
pass
# 응답 분포
value_counts = data[var_name].value_counts()
# 모든 선택지에 대해 출력 (없는 값은 0으로)
for num in sorted(options_dict.keys()):
count = value_counts.get(num, 0) # 없으면 0
percentage = (count / len(data)) * 100 if len(data) > 0 else 0
print(f" {num}. {options_dict[num]}: {count}명 ({percentage:.1f}%)")
print(f"\n 총 응답자: {value_counts.sum()}명")
print('-------------------')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment