目录

Lora/DB模型云端训练(适配Kaggle云平台) - Training GUI 秋叶版

创建时间:2023年11月11日 上次更新:2024年1月5日

本项目旨在降低模型训练的门槛,帮助更多人不花钱就能炼出自己的模型

使用教程见专栏:https://www.bilibili.com/read/cv28372468/?jump_opus=1

  • Lora训练GUI使用了秋叶发布的保姆式LoRA模型一键包文件,对新人友好

  • Kaggle为世界最大的数据平台,提供了免费显卡资源。

  • 经测试,此项目可在Android手机上Edge浏览器运行,但上传训练集步骤可能存在困难,建议PC端运行。

  • 新版训练GUI现在支持训练DB大模型了,经过测试可训练

    作者 BY bilibili NYAN9。此源码为免费公开,分享。若发现有人倒卖,请立即向作者举报

第一步: 填写你的训练集路径(右侧Add Data上传,将图片或者训练集传上去)

https://wd-jishu.oss-cn-hangzhou.aliyuncs.com/img/image-20240206235931917.png@!full

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
import os
import shutil
import socket
import threading
训练集路径 = ['/kaggle/input/wei-111/']  # 包含图片的训练集,可多个,注意路径最前面和最后面要加上/斜线才能复制图片,否则复制的会是目录
目标路径 = '/kaggle/lora-scripts/train/aki/10_boy'  # 5为循环次数,girl为触发词

#下面不要改
def copy():
    try:
        !rm -rf /kaggle/lora-scripts/train/* 

        for 路径 in 训练集路径:
            shutil.copytree(路径, os.path.join(目标路径, os.path.basename(路径)))

            print(f"成功将训练集复制到 '{目标路径}'")
    except Exception as e:
        print(f"没检测到数据集路径,请确保数据集路径是否正确{e}")
        print(f"Datasets path not found, please check if the path correct")
        raise SystemExit
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#查看是什么GPU
import asyncio
import torch
import subprocess
import os
import concurrent.futures
import subprocess
print('正在安装训练脚本,请等待600秒左右')
print("现在使用多线程安装,只需150秒即可")
!cd /kaggle && git clone https://github.com/KaggleSD/lora-scripts
def prepare():
    !apt-get update & pip3 install imjoy-elfinder
    # 安装 aria2 和 python3pip
    !echo 安装aria2下载器
    !apt install -y aria2 && 完成
   

    # 定义一个空文件路径
    devnull = open(os.devnull, 'w')

    # 设置标准输出和标准错误流都重定向到空文件
    command = "imjoy-elfinder --root-dir=/kaggle --port=8076"
    process = subprocess.Popen(command, shell=True, stdout=devnull, stderr=subprocess.STDOUT)
    devnull.close()
    print('显卡信息如下:')
    !nvidia-smi
    # 检查显卡
    print('CUDA版本:',torch.version.cuda)
    print('Pytorch版本:',torch.__version__)
    print('显卡是否可用:','可用' if(torch.cuda.is_available()) else '不可用')
    print('显卡数量:',torch.cuda.device_count())
    print('是否支持BF16数字格式:','支持' if (torch.cuda.is_bf16_supported()) else '不支持')
    print('当前显卡型号:',torch.cuda.get_device_name())
    print('当前显卡的CUDA算力:',torch.cuda.get_device_capability())
    print('当前显卡的总显存:',torch.cuda.get_device_properties(0).total_memory/1024/1024/1024,'GB')
    print('是否支持TensorCore:','支持' if (torch.cuda.get_device_properties(0).major >= 7) else '不支持')
    print('当前显卡的显存使用率:',torch.cuda.memory_allocated(0)/torch.cuda.get_device_properties(0).total_memory*100,'%')
    #安装解压软件和下载存储工具
## 一、基础依赖和训练包拉取 (不用改这里)
## 训练包版本为2023年10月份1.7.0最新版,不定期同步最新版
def install1():
    #!sudo rm -r /kaggle/lora-scripts/
    %cd  /kaggle/
    #!git clone https://gitee.com/nyan9/lora-scripts_1.git
    #!mv lora-scripts_1 lora-scripts
    
    print('下载预训练模型,大小1.2G')
    #!mkdir -p /kaggle/lora-scripts/huggingface/
    #!cd /kaggle/lora-scripts/huggingface/ && aria2c -x 16 -s 16 -k 1M -c https://liblibai-online.vibrou.com/web/model/c52a9848ee316e16b59a0fe17e17edc4c767f97f652b439d9da758d6077160ae.ckpt  -o 1.zip
    #!unzip -o /kaggle/lora-scripts/huggingface/1.zip -d /kaggle/lora-scripts/huggingface/ && echo 解压成功
    #!sudo rm -r /kaggle/lora-scripts/huggingface/1.zip && echo 删除压缩包成功
    #!mkdir -p /kaggle/lora-scripts/train/aki
    #!mkdir -p /kaggle/lora-scripts/train/reg
    !echo 训练包拉取成功
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
'''
### 说明:
# 由于Kaggle没有自带内网穿透,无法访问GUI和文件管理浏览器,所以这里使用了本人的公益Frp内网穿透。才得以访问训练GUI
'''
import subprocess
import shutil
import os
import time
import random

use_frpc = True
frpconfigfile = '/kaggle/input/aihua-5000/frpc_10669839.ini'
random_number = random.randint(1000, 9999)
config = """
[common]
server_addr = frp.freefrp.net
server_port = 7000
token = freefrp.net

[28000loratrainUI]
type = tcp
local_ip = 127.0.0.1
local_port = 28000
# remote_port = 17861

"""
config_with_random_number = config.replace('[28000loratrainUI]', f'[28000loratrainUI_{random_number}]')
# 定义保存路径
file_path = '/kaggle/working/cyanfrp.ini'

# 将配置内容写入文件
with open(file_path, 'w') as config_file:
    config_file.write(config_with_random_number)



def install_Frpc(port, frpconfigfile, use_frpc):
    if use_frpc:
        #!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.sukaka.top/datasets/ACCA225/LoCyanfrp/resolve/main/frpc -d ./frpc -o frpc
        !cp /kaggle/input/net-tools/frpc /kaggle
        subprocess.run(['chmod', '+x', '/kaggle/frpc'], check=True)
        print(f'正在启动frp,端口{port}')
        # 将frp的输出重定向到文件
        with open('/kaggle/frp.log', 'w') as frp_output_file:
            subprocess.Popen(['/kaggle/frpc', '-c', '/kaggle/working/cyanfrp.ini'], stdout=frp_output_file, stderr=subprocess.STDOUT)
        !sleep 5
        !cat /kaggle/frp.log
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

def extract_ip_with_port_from_log(log_file):
    print('测试')
    with open(log_file, 'r') as file:
        log_content = file.read()
        
        ip_port_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}:[0-9]+\b')
        
        ip_addresses_with_port = re.findall(ip_port_pattern, log_content)
        return ip_addresses_with_port

def iframe_thread_1(port):
    ip_addresses_with_port = extract_ip_with_port_from_log('/kaggle/frp.log')  # Replace with your actual log file
    while True:
        time.sleep(0.5)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex(('127.0.0.1', port))
        if result == 0:
            break
        sock.close()
        !sleep 18
        copy()
        print("Lora训练GUI的IP地址:", ip_addresses_with_port)


def extract_ip_with_port_from_log2(log_file):
    with open(log_file, 'r') as file:
        log_content2 = file.read()
        
        ip_port_pattern2 = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}:[0-9]+\b')
        
        ip_addresses_with_port2 = re.findall(ip_port_pattern2, log_content2)
        return ip_addresses_with_port2

def iframe_thread_2(port):
    ip_addresses_with_port2 = extract_ip_with_port_from_log('/kaggle/frp2.log')  # Replace with your actual log file
    while True:
        time.sleep(0.5)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex(('127.0.0.1', port))
        if result == 0:
            break
        sock.close()
        !sleep 22
        print("文件管理器地址:", ip_addresses_with_port2)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
'''
### 说明:
# 由于Kaggle没有自带内网穿透,无法访问GUI和文件管理浏览器,所以这里使用了本人的公益Frp内网穿透。才得以访问文件浏览器
'''


# 关闭文件描述符

import subprocess
import shutil
import os
import time

use_frpc1 = True
frpconfigfile1 = '/kaggle/input/aihua-5000/frpc_10669839.ini'
import random

# 生成四位随机数,防止端口冲突
random_number = random.randint(1000, 9999)

config1 = """
[common]
server_addr = frp.freefrp.net
server_port = 7000
token = freefrp.net

[28000fileUI]
type = tcp
local_ip = 127.0.0.1
local_port = 8076
# remote_port = 17861
"""

config1_with_random_number = config1.replace('[28000fileUI]', f'[28000fileUI_{random_number}]')

file_path = '/kaggle/working/nyanfrp.ini'

# 将配置内容写入文件
with open(file_path, 'w') as config_file:
    config_file.write(config1_with_random_number)


def install_Frpc1(port, frpconfigfile1, use_frpc1):
    if use_frpc1:
        #!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.sukaka.top/datasets/ACCA225/LoCyanfrp/resolve/main/frpc -d ./frpc -o frpc
        !cp /kaggle/input/net-tools/frpc /kaggle
        subprocess.run(['chmod', '+x', '/kaggle/frpc'], check=True)
        print(f'正在启动frp,端口{port}')
        # 将frp的输出重定向到文件
        with open('/kaggle/frp2.log', 'w') as frp_output_file:
            subprocess.Popen(['/kaggle/frpc', '-c', '/kaggle/working/nyanfrp.ini'], stdout=frp_output_file, stderr=subprocess.STDOUT)
        !sleep 5
        !cat /kaggle/frp2.log

#install_Frpc1('8076', frpconfigfile1, use_frpc1)

第二步:训练底模下载

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def model():
    !mkdir -p /kaggle/lora-scripts/sd-models/
    模型链接 = "https://huggingface.co/Lykon/AnyLoRA/resolve/main/AnyLoRA_noVae_fp16-pruned.safetensors" #双引号内填模型下载地址
    模型命名及后缀="model.safetensors" #双引号内填模型命名及后缀
    下载路径 = "/kaggle/lora-scripts/sd-models/"  #双引号内填模型的下载路径
    print(f'下载训练底模 {模型命名及后缀}')
    !cd "{下载路径}" && aria2c --console-log-level=error -x 16 -s 16 -k 1M -c "{模型链接}" -o "{模型命名及后缀}" && echo 下载成功
#-----------------------------------------------下面别改-----------------------------------
install_path2 = '/kaggle/opt/conda/envs/'
Venvpath = '/kaggle/input/lolidreambooth/kaggle/working/venv.tar.bak' 
def venv_install():
    if os.path.exists(Venvpath):
        if os.path.exists('/kaggle/working/opt'):
            !source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
        else:
            os.makedirs(install_path2, exist_ok=True)
            %cd {install_path2}
            !mkdir venv
            print('安装VENV环境')
            !tar -xf {Venvpath} -C {install_path2}venv
                #!source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
                    #安装依赖
    

训练VAE下载

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# 下载 VAE (SDXL训练需要用到)
是否下载sdxl_vae = False
def vae():
    if 是否下载sdxl_vae:
        !mkdir -p /kaggle/lora-scripts/sd-models/vae
        模型链接2 = "https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors" #双引号内填模型下载地址
        模型命名及后缀2="sdxl_vae.safetensors" #双引号内填模型命名及后缀
        下载路径2 = "/kaggle/lora-scripts/sd-models/vae"  #双引号内填模型的下载路径
        print(f'下载训练底模 {模型命名及后缀2}')
        !cd "{下载路径2}" && aria2c --console-log-level=error -x 16 -s 16 -k 1M -c "{模型链接2}" -o "{模型命名及后缀2}" && echo 下载成功
        #安装依赖

[ ]:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import concurrent.futures
'''
执行函数
'''
import time
def install():
    try:
        start_time = time.time()
        # 启动一个线程执行venv_install()
        venv_install_thread = threading.Thread(target=venv_install)
        venv_install_thread.start()
        prepare_thread = threading.Thread(target=prepare)
        prepare_thread.start()
        prepare_thread.join()
        install1_thread = threading.Thread(target=install1)
        install1_thread.start()
        vae()
        model()
        install_Frpc('28000', frpconfigfile, use_frpc)
        install_Frpc1('8076', frpconfigfile, use_frpc)
        !apt-get install -y python3-pip
        !apt-get install -y libfuse-dev
        print("等待Python环境安装完成")
        venv_install_thread.join()
        # 计算总耗时
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"加载耗时: {elapsed_time} 秒")
    except Exception as e:
        print('ERROR!!由于你自身迷惑操作导致发生未知错误')

自动上传模型到Huggingface (可选)

防止Kaggle存储不够导致训练失败。Huggingface有无限存储。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
## 启动秋葉aaaki的一键式lora训练界面
upload = False
import re
import asyncio
#功能函数,清理打包上传
from pathlib import Path
from huggingface_hub import HfApi, login
directory_path = '/kaggle/input/mohemodel'
install_path="/kaggle/working" 
# 使用huggingface保存和载入webui配置文件
huggingface_use = True
huggingface_token_file = '/kaggle/input/tenkens/hugfacetoken.txt'
huggiingface_repo_id = 'ACCC1380/private-model'
hugToken = 'hf_JZFqkANVBeq**DrDtSCARGoWlIcFY'
uploadhf = True
repo_id = 'ACCC1380/private-model'
import os
import time
from pathlib import Path
import re
import asyncio
from huggingface_hub import HfApi, login

directory = '/kaggle/working' #搜索的目标目录
yun_files = []  # Define yun_files outside the try block

def compress_images(directory, huggingface_token_file, repo_id):
    if upload:
        initial_files = set()
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(('.safetensors', '.ckpt')):
                    filepath = os.path.join(root, file)
                    initial_files.add(filepath)
        while True:
            try:
                time.sleep(0.1)
                current_files = set()
                for root, _, files in os.walk(directory):
                    for file in files:
                        if file.endswith(('.safetensors', '.ckpt')):
                            filepath = os.path.join(root, file)
                            current_files.add(filepath)

                new_files = current_files - initial_files
                if new_files:
                    print("New files detected:")
                    yun_files = list(new_files)
                    !sleep 2
                    for new_file in yun_files:
                        print(yun_files)
                        hugface_upload(huggingface_token_file, yun_files, repo_id)
                    for uploaded_file in yun_files:
                        os.remove(uploaded_file)
                    initial_files = current_files
            except Exception as e:
                print(f"发生错误: {e}")
                # Handle the error as needed
                break  # Terminate the loop on error

def hugface_upload(huggingface_token_file, yun_files, repo_id):
    if uploadhf:
        if hugToken != '':
            # Use your Hugging Face access token to log in
            login(token=hugToken)
            # Instantiate the HfApi class
            api = HfApi()
            print("HfApi class instantiated")
            # Use the upload_file() function to upload files
            print("Starting file upload...")
            for yun_file in yun_files:
                if Path(yun_file).exists():
                    response = api.upload_file(
                        path_or_fileobj=yun_file,
                        path_in_repo=yun_file,
                        repo_id=repo_id,
                        repo_type="dataset"
                    )
                    print("File upload completed")
                    print(f"Response: {response}")
                else:
                    print(f'Error: File {yun_file} does not exist')
        else:
            print(f'Error: Hugging Face token is empty')
    else:
        print(f'Error: File {huggingface_token_file} does not exist')

        
#hugface_upload(huggingface_token_file,yun_files,huggiingface_repo_id)

# 请不要单独执行代码块
def start():
    ip_addresses_with_port = extract_ip_with_port_from_log('/kaggle/frp.log')
    threading.Thread(target=iframe_thread_1, daemon=True, args=(28000,)).start()
    threading.Thread(target=iframe_thread_2, daemon=True, args=(28000,)).start()
    %cd /kaggle/lora-scripts    
    !df -h
    !source /kaggle/opt/conda/envs/venv/kaggle/working/venv/bin/python3/activate venv
    !/kaggle/opt/conda/envs/venv/kaggle/working/venv/bin/python3 gui.py

install()

[ ]:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
import concurrent.futures
'''
执行函数,请勿单独执行代码块
'''
if __name__ == "__main__":
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
    future1 = executor.submit(start)
    future2 = executor.submit(compress_images, directory, huggingface_token_file, repo_id)
    concurrent.futures.wait([future1, future2])
    executor.shutdown()

使用说明 (必看)

https://wd-jishu.oss-cn-hangzhou.aliyuncs.com/img/image-20240207000131829.png@!full

https://wd-jishu.oss-cn-hangzhou.aliyuncs.com/img/6daeffceb55bc84c87667049562b292cc138655c9ac66481a7e73b5ae0318793.png@!full https://wd-jishu.oss-cn-hangzhou.aliyuncs.com/img/aNoLTF61mqBMRrV.png@!full

https://wd-jishu.oss-cn-hangzhou.aliyuncs.com/img/bf25e3079050e174d28dc3f63c845868931c9c3382ac1ef0db7cde18820f0743.png@!full

关于Kaggle服务器免费提供的配置如下:

内存显卡CPU存储
29GBT4双卡Xeon系统盘 73.1G
Linux15GB x24核数据盘(output) 19.5G

常用训练底模链接:

SDXL: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0_0.9vae.safetensors

SD 1.5: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors

anime_pruned_full:https://huggingface.co/Zerro0/animefull-final-pruned/resolve/main/model.ckpt?download=true

kohaku_XL v7: https://civitai.com/api/download/models/203416

更新历史:

前身:2023年7月份创建的kohya_ss的Kaggle训练脚本,但使用麻烦,无UI界面。无法在线打标。很多人不会用

  • 2023年11月10日:

  • 2023年11月11日:



 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
import os
install_path2 = '/kaggle/working/opt/conda/envs/'
Venvpath = '/kaggle/input/sd-webui-venv/venv.tar.bak' 
def venv_install():
    if os.path.exists(Venvpath):
        if os.path.exists('/kaggle/working/opt'):
            !source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
        else:
            os.makedirs(install_path2, exist_ok=True)
            %cd {install_path2}
            !mkdir venv
            print('安装VENV环境')
            !tar -xf {Venvpath} -C {install_path2}venv
                #!source /kaggle/working/opt/conda/envs/venv/bin/activate venv
            print('环境安装完毕')
                    #安装依赖
venv_install()

[ ]:

1
!/kaggle/working/opt/conda/envs/venv/bin/python -V

随机文章