目录

目录

Lora/DB模型云端训练(适配Kaggle云平台) - Training GUI 秋叶版

目录

Lora/DB模型云端训练(适配Kaggle云平台) - Training GUI 秋叶版

创建时间:2023年11月11日 上次更新:2024年1月5日

第一步: 填写你的训练集路径(右侧Add Data上传,将图片或者训练集传上去)

image-20240207001013373

import os
import shutil
import socket
import threading
训练集路径 = ['/kaggle/input/wei-111/']  # 包含图片的训练集,可多个,注意路径最前面和最后面要加上/斜线才能复制图片,否则复制的会是目录
目标路径 = '/kaggle/lora-scripts/train/aki/10_boy'  # 5为循环次数,girl为触发词

#下面不要改
def copy():
    try:
        !rm -rf /kaggle/lora-scripts/train/* 

        for 路径 in 训练集路径:
            shutil.copytree(路径, os.path.join(目标路径, os.path.basename(路径)))

            print(f"成功将训练集复制到 '{目标路径}'")
    except Exception as e:
        print(f"没检测到数据集路径,请确保数据集路径是否正确{e}")
        print(f"Datasets path not found, please check if the path correct")
        raise SystemExit
#查看是什么GPU
import asyncio
import torch
import subprocess
import os
import concurrent.futures
import subprocess
print('正在安装训练脚本,请等待600秒左右')
print("现在使用多线程安装,只需150秒即可")
!cd /kaggle && git clone https://github.com/KaggleSD/lora-scripts
def prepare():
    !apt-get update & pip3 install imjoy-elfinder
    # 安装 aria2 和 python3pip
    !echo 安装aria2下载器
    !apt install -y aria2 && 完成
   

    # 定义一个空文件路径
    devnull = open(os.devnull, 'w')

    # 设置标准输出和标准错误流都重定向到空文件
    command = "imjoy-elfinder --root-dir=/kaggle --port=8076"
    process = subprocess.Popen(command, shell=True, stdout=devnull, stderr=subprocess.STDOUT)
    devnull.close()
    print('显卡信息如下:')
    !nvidia-smi
    # 检查显卡
    print('CUDA版本:',torch.version.cuda)
    print('Pytorch版本:',torch.__version__)
    print('显卡是否可用:','可用' if(torch.cuda.is_available()) else '不可用')
    print('显卡数量:',torch.cuda.device_count())
    print('是否支持BF16数字格式:','支持' if (torch.cuda.is_bf16_supported()) else '不支持')
    print('当前显卡型号:',torch.cuda.get_device_name())
    print('当前显卡的CUDA算力:',torch.cuda.get_device_capability())
    print('当前显卡的总显存:',torch.cuda.get_device_properties(0).total_memory/1024/1024/1024,'GB')
    print('是否支持TensorCore:','支持' if (torch.cuda.get_device_properties(0).major >= 7) else '不支持')
    print('当前显卡的显存使用率:',torch.cuda.memory_allocated(0)/torch.cuda.get_device_properties(0).total_memory*100,'%')
    #安装解压软件和下载存储工具
## 一、基础依赖和训练包拉取 (不用改这里)
## 训练包版本为2023年10月份1.7.0最新版,不定期同步最新版
def install1():
    #!sudo rm -r /kaggle/lora-scripts/
    %cd  /kaggle/
    #!git clone https://gitee.com/nyan9/lora-scripts_1.git
    #!mv lora-scripts_1 lora-scripts
    
    print('下载预训练模型,大小1.2G')
    #!mkdir -p /kaggle/lora-scripts/huggingface/
    #!cd /kaggle/lora-scripts/huggingface/ && aria2c -x 16 -s 16 -k 1M -c https://liblibai-online.vibrou.com/web/model/c52a9848ee316e16b59a0fe17e17edc4c767f97f652b439d9da758d6077160ae.ckpt  -o 1.zip
    #!unzip -o /kaggle/lora-scripts/huggingface/1.zip -d /kaggle/lora-scripts/huggingface/ && echo 解压成功
    #!sudo rm -r /kaggle/lora-scripts/huggingface/1.zip && echo 删除压缩包成功
    #!mkdir -p /kaggle/lora-scripts/train/aki
    #!mkdir -p /kaggle/lora-scripts/train/reg
    !echo 训练包拉取成功

第二步:训练底模下载

def model():
    !mkdir -p /kaggle/lora-scripts/sd-models/
    模型链接 = "https://huggingface.co/Lykon/AnyLoRA/resolve/main/AnyLoRA_noVae_fp16-pruned.safetensors" #双引号内填模型下载地址
    模型命名及后缀="model.safetensors" #双引号内填模型命名及后缀
    下载路径 = "/kaggle/lora-scripts/sd-models/"  #双引号内填模型的下载路径
    print(f'下载训练底模 {模型命名及后缀}')
    !cd "{下载路径}" && aria2c --console-log-level=error -x 16 -s 16 -k 1M -c "{模型链接}" -o "{模型命名及后缀}" && echo 下载成功
#-----------------------------------------------下面别改-----------------------------------
install_path2 = '/kaggle/opt/conda/envs/'
Venvpath = '/kaggle/input/lolidreambooth/kaggle/working/venv.tar.bak' 
def venv_install():
    if os.path.exists(Venvpath):
        if os.path.exists('/kaggle/working/opt'):
            !source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
        else:
            os.makedirs(install_path2, exist_ok=True)
            %cd {install_path2}
            !mkdir venv
            print('安装VENV环境')
            !tar -xf {Venvpath} -C {install_path2}venv
                #!source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
                    #安装依赖
    

训练VAE下载

# 下载 VAE (SDXL训练需要用到)
是否下载sdxl_vae = False
def vae():
    if 是否下载sdxl_vae:
        !mkdir -p /kaggle/lora-scripts/sd-models/vae
        模型链接2 = "https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors" #双引号内填模型下载地址
        模型命名及后缀2="sdxl_vae.safetensors" #双引号内填模型命名及后缀
        下载路径2 = "/kaggle/lora-scripts/sd-models/vae"  #双引号内填模型的下载路径
        print(f'下载训练底模 {模型命名及后缀2}')
        !cd "{下载路径2}" && aria2c --console-log-level=error -x 16 -s 16 -k 1M -c "{模型链接2}" -o "{模型命名及后缀2}" && echo 下载成功
        #安装依赖
import concurrent.futures
'''
执行函数
'''
import time
def install():
    try:
        start_time = time.time()
        # 启动一个线程执行venv_install()
        venv_install_thread = threading.Thread(target=venv_install)
        venv_install_thread.start()
        prepare_thread = threading.Thread(target=prepare)
        prepare_thread.start()
        prepare_thread.join()
        install1_thread = threading.Thread(target=install1)
        install1_thread.start()
        vae()
        model()
        install_Frpc('28000', frpconfigfile, use_frpc)
        install_Frpc1('8076', frpconfigfile, use_frpc)
        !apt-get install -y python3-pip
        !apt-get install -y libfuse-dev
        print("等待Python环境安装完成")
        venv_install_thread.join()
        # 计算总耗时
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"加载耗时: {elapsed_time} 秒")
    except Exception as e:
        print('ERROR!!由于你自身迷惑操作导致发生未知错误')

自动上传模型到Huggingface (可选)

防止Kaggle存储不够导致训练失败。Huggingface有无限存储。

## 启动秋葉aaaki的一键式lora训练界面
upload = False
import re
import asyncio
#功能函数,清理打包上传
from pathlib import Path
from huggingface_hub import HfApi, login
directory_path = '/kaggle/input/mohemodel'
install_path="/kaggle/working" 
# 使用huggingface保存和载入webui配置文件
huggingface_use = True
huggingface_token_file = '/kaggle/input/tenkens/hugfacetoken.txt'
huggiingface_repo_id = 'ACCC1380/private-model'
hugToken = 'hf_JZFqkANVBeq**DrDtSCARGoWlIcFY'
uploadhf = True
repo_id = 'ACCC1380/private-model'
import os
import time
from pathlib import Path
import re
import asyncio
from huggingface_hub import HfApi, login

directory = '/kaggle/working' #搜索的目标目录
yun_files = []  # Define yun_files outside the try block

def compress_images(directory, huggingface_token_file, repo_id):
    if upload:
        initial_files = set()
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(('.safetensors', '.ckpt')):
                    filepath = os.path.join(root, file)
                    initial_files.add(filepath)
        while True:
            try:
                time.sleep(0.1)
                current_files = set()
                for root, _, files in os.walk(directory):
                    for file in files:
                        if file.endswith(('.safetensors', '.ckpt')):
                            filepath = os.path.join(root, file)
                            current_files.add(filepath)

                new_files = current_files - initial_files
                if new_files:
                    print("New files detected:")
                    yun_files = list(new_files)
                    !sleep 2
                    for new_file in yun_files:
                        print(yun_files)
                        hugface_upload(huggingface_token_file, yun_files, repo_id)
                    for uploaded_file in yun_files:
                        os.remove(uploaded_file)
                    initial_files = current_files
            except Exception as e:
                print(f"发生错误: {e}")
                # Handle the error as needed
                break  # Terminate the loop on error

def hugface_upload(huggingface_token_file, yun_files, repo_id):
    if uploadhf:
        if hugToken != '':
            # Use your Hugging Face access token to log in
            login(token=hugToken)
            # Instantiate the HfApi class
            api = HfApi()
            print("HfApi class instantiated")
            # Use the upload_file() function to upload files
            print("Starting file upload...")
            for yun_file in yun_files:
                if Path(yun_file).exists():
                    response = api.upload_file(
                        path_or_fileobj=yun_file,
                        path_in_repo=yun_file,
                        repo_id=repo_id,
                        repo_type="dataset"
                    )
                    print("File upload completed")
                    print(f"Response: {response}")
                else:
                    print(f'Error: File {yun_file} does not exist')
        else:
            print(f'Error: Hugging Face token is empty')
    else:
        print(f'Error: File {huggingface_token_file} does not exist')

        
#hugface_upload(huggingface_token_file,yun_files,huggiingface_repo_id)

# 请不要单独执行代码块
def start():
    ip_addresses_with_port = extract_ip_with_port_from_log('/kaggle/frp.log')
    threading.Thread(target=iframe_thread_1, daemon=True, args=(28000,)).start()
    threading.Thread(target=iframe_thread_2, daemon=True, args=(28000,)).start()
    %cd /kaggle/lora-scripts    
    !df -h
    !source /kaggle/opt/conda/envs/venv/kaggle/working/venv/bin/python3/activate venv
    !/kaggle/opt/conda/envs/venv/kaggle/working/venv/bin/python3 gui.py

install()
import concurrent.futures
'''
执行函数,请勿单独执行代码块
'''
if __name__ == "__main__":
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
    future1 = executor.submit(start)
    future2 = executor.submit(compress_images, directory, huggingface_token_file, repo_id)
    concurrent.futures.wait([future1, future2])
    executor.shutdown()

使用说明 (必看)

Image Image

Image

add Codeadd Markdown

关于Kaggle服务器免费提供的配置如下:

add Codeadd Markdown

内存显卡CPU存储
29GBT4双卡Xeon系统盘 73.1G
Linux15GB x24核数据盘(output) 19.5G

add Codeadd Markdown

常用训练底模链接:

add Codeadd Markdown

SDXL: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0_0.9vae.safetensors

SD 1.5: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors

anime_pruned_full:https://huggingface.co/Zerro0/animefull-final-pruned/resolve/main/model.ckpt?download=true

kohaku_XL v7: https://civitai.com/api/download/models/203416

add Codeadd Markdown

更新历史:

前身:2023年7月份创建的kohya_ss的Kaggle训练脚本,但使用麻烦,无UI界面。无法在线打标。很多人不会用

  • 2023年11月10日:

  • 2023年11月11日:

import os
install_path2 = '/kaggle/working/opt/conda/envs/'
Venvpath = '/kaggle/input/sd-webui-venv/venv.tar.bak' 
def venv_install():
    if os.path.exists(Venvpath):
        if os.path.exists('/kaggle/working/opt'):
            !source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
        else:
            os.makedirs(install_path2, exist_ok=True)
            %cd {install_path2}
            !mkdir venv
            print('安装VENV环境')
            !tar -xf {Venvpath} -C {install_path2}venv
                #!source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
                    #安装依赖
venv_install()

!/kaggle/working/opt/conda/envs/venv/bin/python -V

相关内容