generated from xuyuqing/ailab
122 lines
3.4 KiB
Plaintext
122 lines
3.4 KiB
Plaintext
{
|
|
"output_dir": "test/ailabmodel/my_llama2_model_wenan_100epo",
|
|
"overwrite_output_dir": false,
|
|
"do_train": false,
|
|
"do_eval": true,
|
|
"do_predict": false,
|
|
"evaluation_strategy": "epoch",
|
|
"prediction_loss_only": false,
|
|
"per_device_train_batch_size": 4,
|
|
"per_device_eval_batch_size": 4,
|
|
"per_gpu_train_batch_size": null,
|
|
"per_gpu_eval_batch_size": null,
|
|
"gradient_accumulation_steps": 4,
|
|
"eval_accumulation_steps": null,
|
|
"eval_delay": 0,
|
|
"learning_rate": 5e-05,
|
|
"weight_decay": 0,
|
|
"adam_beta1": 0.9,
|
|
"adam_beta2": 0.999,
|
|
"adam_epsilon": 1e-08,
|
|
"max_grad_norm": 1.0,
|
|
"num_train_epochs": 20,
|
|
"max_steps": -1,
|
|
"lr_scheduler_type": "cosine",
|
|
"warmup_ratio": 0.03,
|
|
"warmup_steps": 0,
|
|
"log_level": "passive",
|
|
"log_level_replica": "warning",
|
|
"log_on_each_node": true,
|
|
"logging_dir": "test/ailabmodel/my_llama2_model_wenan_100epo/runs/Oct22_14-37-07_hu-ailab-10-101-3-63.atp.cn",
|
|
"logging_strategy": "steps",
|
|
"logging_first_step": false,
|
|
"logging_steps": 10,
|
|
"logging_nan_inf_filter": true,
|
|
"save_strategy": "steps",
|
|
"save_steps": 500,
|
|
"save_total_limit": 3,
|
|
"save_safetensors": false,
|
|
"save_on_each_node": false,
|
|
"no_cuda": false,
|
|
"use_mps_device": false,
|
|
"seed": 42,
|
|
"data_seed": null,
|
|
"jit_mode_eval": false,
|
|
"use_ipex": false,
|
|
"bf16": false,
|
|
"fp16": true,
|
|
"fp16_opt_level": "O1",
|
|
"half_precision_backend": "auto",
|
|
"bf16_full_eval": false,
|
|
"fp16_full_eval": false,
|
|
"tf32": null,
|
|
"local_rank": 0,
|
|
"ddp_backend": null,
|
|
"tpu_num_cores": null,
|
|
"tpu_metrics_debug": false,
|
|
"debug": [],
|
|
"dataloader_drop_last": false,
|
|
"eval_steps": 250,
|
|
"dataloader_num_workers": 0,
|
|
"past_index": -1,
|
|
"run_name": "test/ailabmodel/my_llama2_model_wenan_100epo",
|
|
"disable_tqdm": false,
|
|
"remove_unused_columns": true,
|
|
"label_names": null,
|
|
"load_best_model_at_end": false,
|
|
"metric_for_best_model": null,
|
|
"greater_is_better": null,
|
|
"ignore_data_skip": false,
|
|
"sharded_ddp": [],
|
|
"fsdp": [],
|
|
"fsdp_min_num_params": 0,
|
|
"fsdp_config": {
|
|
"fsdp_min_num_params": 0,
|
|
"xla": false,
|
|
"xla_fsdp_grad_ckpt": false
|
|
},
|
|
"fsdp_transformer_layer_cls_to_wrap": null,
|
|
"deepspeed": "/data1/cgzhang6/ailab_sdk/src/ailab/atp_finetuner/trainer/nlp/ds_zero2_no_offload.json",
|
|
"label_smoothing_factor": 0.0,
|
|
"optim": "adamw_torch",
|
|
"optim_args": null,
|
|
"adafactor": false,
|
|
"group_by_length": false,
|
|
"length_column_name": "length",
|
|
"report_to": [
|
|
"tensorboard"
|
|
],
|
|
"ddp_find_unused_parameters": false,
|
|
"ddp_bucket_cap_mb": null,
|
|
"dataloader_pin_memory": true,
|
|
"skip_memory_metrics": true,
|
|
"use_legacy_prediction_loop": false,
|
|
"push_to_hub": false,
|
|
"resume_from_checkpoint": true,
|
|
"hub_model_id": null,
|
|
"hub_strategy": "every_save",
|
|
"hub_token": "<HUB_TOKEN>",
|
|
"hub_private_repo": false,
|
|
"gradient_checkpointing": false,
|
|
"include_inputs_for_metrics": false,
|
|
"fp16_backend": "auto",
|
|
"push_to_hub_model_id": null,
|
|
"push_to_hub_organization": null,
|
|
"push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
|
|
"mp_parameters": "",
|
|
"auto_find_batch_size": false,
|
|
"full_determinism": false,
|
|
"torchdynamo": null,
|
|
"ray_scope": "last",
|
|
"ddp_timeout": 30000,
|
|
"torch_compile": false,
|
|
"torch_compile_backend": null,
|
|
"torch_compile_mode": null,
|
|
"xpu_backend": null,
|
|
"sortish_sampler": false,
|
|
"predict_with_generate": false,
|
|
"generation_max_length": 512,
|
|
"generation_num_beams": null,
|
|
"generation_config": null
|
|
}
|