kaggle竞赛 宠物受欢迎程度baseline方案代码与解析
PetFinder.my - Pawpularity Contest baseline
宠物受欢迎程度分析
评价指标:RMSE 前传 kaggle竞赛-宠物受欢迎程度(赛题讲解与数据分析)
数据集路径 PetFinder.my - Pawpularity ContestSwin Transformertimmbaseline
import sys import gc sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master') from timm import create_model from fastai.vision.all import * set_seed(365, reproducible=True) BATCH_SIZE = 32 1234567 data
train_df = pd.read_csv(dataset_path/'train.csv') train_df.head() 12
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg') train_df = train_df.drop(columns=['Id']) train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe train_df.head() 12345
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'): os.makedirs('/root/.cache/torch/hub/checkpoints/') !cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth' 123
随机种子设置
seed=365 set_seed(seed, reproducible=True) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.use_deterministic_algorithms = True 123456 对数据做分箱的技巧 不同的数据量应该做怎样的分箱
import math #Rice rule num_bins = int(np.ceil(2*((len(train_df))**(1./3)))) num_bins 1234
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False) train_df['bins'].hist() 12
from sklearn.model_selection import KFold from sklearn.model_selection import StratifiedKFold train_df['fold'] = -1 N_FOLDS = 10#分10折交叉验证 strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True) for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])): train_df.iloc[train_index, -1] = i train_df['fold'] = train_df['fold'].astype('int') train_df.fold.value_counts().plot.bar() 123456789101112131415
train_df[train_df['fold']==0]['bins'].value_counts() 1
def petfinder_rmse(input,target): return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target)) 12 dataloading
def get_data(fold): train_df_f = train_df.copy() train_df_f['is_valid'] = (train_df_f['fold'] == fold)#验证集 #from fastai.vision.all import * dls = ImageDataLoaders.from_df(train_df_f, valid_col='is_valid', #验证集列 seed=365, #seed fn_col='path', #图像的路径 label_col='norm_score', #label#label is in the first column of the DataFrame y_block=RegressionBlock, #The type of target bs=BATCH_SIZE, #pass in batch size num_workers=8, item_tfms=Resize(224), #pass in item_tfms batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #图像增强策略 return dls
12345678910111213141516#Valid Kfolder size the_data = get_data(0) assert (len(the_data.train) + len(the_data.valid)) == (len(train_df)//BATCH_SIZE) 1234 Model
def get_learner(fold_num): data = get_data(fold_num) model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c) learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse).to_fp16() return learn 12345678 test data
test_df = pd.read_csv(dataset_path/'test.csv') test_df.head() ##处理图像路径 test_df['Pawpularity'] = [1]*len(test_df) test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg') test_df = test_df.drop(columns=['Id']) train_df['norm_score'] = train_df['Pawpularity']/100 1234567
get_learner(fold_num=0).lr_find(end_lr=3e-2) 1
training
all_preds = [] for i in range(N_FOLDS): print(f'Fold {i} results') learn = get_learner(fold_num=i) learn.fit_one_cycle(5, 2e-5, cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', comp=np.less, patience=2)]) learn.recorder.plot_loss() dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame valid_pct=0.2, #80-20 train-validation random split seed=365, #seed fn_col='path', #filename/path is in the second column of the DataFrame label_col='norm_score', #label is in the first column of the DataFrame y_block=RegressionBlock, #The type of target bs=BATCH_SIZE, #pass in batch size num_workers=8, item_tfms=Resize(224), #pass in item_tfms batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) test_dl = dls.test_dl(test_df) preds, _ = learn.tta(dl=test_dl, n=5, beta=0) all_preds.append(preds) del learn torch.cuda.empty_cache() gc.collect()
123456789101112131415161718192021222324252627282930313233all_preds 1
np.mean(np.stack(all_preds*100)) 1
sample_df = pd.read_csv(dataset_path/'sample_submission.csv') preds = np.mean(np.stack(all_preds), axis=0) sample_df['Pawpularity'] = preds*100 sample_df.to_csv('submission.csv',index=False) 1234
pd.read_csv('submission.csv').head() 1
相关知识
kaggle竞赛 宠物受欢迎程度baseline方案代码与解析
kaggle竞赛
【记第一次kaggle比赛】PetFinder.my
作弊翻车!Kaggle 大赛第一团队获最严处分
kaggle项目之宠物收养的速度预测
深度学习【Kaggle新赛】宠物预测大赛指导班(多模态)
Kaggle PetFinder.my
为了1万美元奖金,他作弊拿到Kaggle比赛第一名,之后跳去硅谷明星AI创业公司,现已被封号
4:宠物美容专业技能竞赛方案
实战Kaggle比赛:狗的品种识别(ImageNet Dogs)
网址: kaggle竞赛 宠物受欢迎程度baseline方案代码与解析 https://www.mcbbbk.com/newsview1111152.html
上一篇: 受宠物惊吓摔伤谁担责? |
下一篇: 《成为死对头的宠物后》季羡鱼 |
推荐分享

- 1我的狗老公李淑敏33——如何 5096
- 2南京宠物粮食薄荷饼宠物食品包 4363
- 3家养水獭多少钱一只正常 3825
- 4豆柴犬为什么不建议养?可爱的 3668
- 5自制狗狗辅食:棉花面纱犬的美 3615
- 6狗交配为什么会锁住?从狗狗生 3601
- 7广州哪里卖宠物猫狗的选择性多 3535
- 8湖南隆飞尔动物药业有限公司宠 3477
- 9黄金蟒的价格 3396
- 10益和 MATCHWELL 狗 3352