Skip to content

原理

js将大文件分成多分,全部上传成功之后,调用合并接口合成文件。如果传输中断,下次上传的时候过滤掉已经上传成功的分片,将剩余的分片上传,成功之后合并文件。

文件切片

首先我们要将大文件进行切片,大致的步骤如下:

  • 提供默认切片大小根据文件大小计算该文件一共有多少片
  • 利用是spark-md5,去创建一个计算ArrayBuffer的对象,获取文件的MD5值,这个就跟人的身份证一样,是唯一的标识。
  • 创建一个fileReader对象,在读取完成时触发onload是将每一个切片spark.append增量计算md5,一直将所有的切片append完后,结束,spark.end()就是最后文件的MD5hash值(即fileHash)
  • 通过一个上传切片接口携带fileHash,将所有切片上传。将所有已经上传成功的切片对应的切片下标值存入localstorage
  • 最后通过一个合并接口告诉服务端将所有的切片合并

前端整体代码

js
import axios from "axios";

const baseURL = 'http://localhost:3001';
export const controller = new AbortController();


export const uploadFile = (url, formData, onUploadProgress = () => { }) => {
  return axios({
    method: 'post',
    url,
    baseURL,
    signal: controller.signal,
    headers: {
      'Content-Type': 'multipart/form-data'
    },
    data: formData,
    onUploadProgress
  });
}

export const mergeChunks = (url, data) => {
  return axios({
    method: 'post',
    url,
    baseURL,
    headers: {
      'Content-Type': 'application/json'
    },
    data
  });
}
import axios from "axios";

const baseURL = 'http://localhost:3001';
export const controller = new AbortController();


export const uploadFile = (url, formData, onUploadProgress = () => { }) => {
  return axios({
    method: 'post',
    url,
    baseURL,
    signal: controller.signal,
    headers: {
      'Content-Type': 'multipart/form-data'
    },
    data: formData,
    onUploadProgress
  });
}

export const mergeChunks = (url, data) => {
  return axios({
    method: 'post',
    url,
    baseURL,
    headers: {
      'Content-Type': 'application/json'
    },
    data
  });
}
js
<script setup>
import { computed, ref } from "vue"
import * as SparkMD5 from "spark-md5"
import { uploadFile, mergeChunks, controller } from "./request"

// 默认分块大小
const DefualtChunkSize = 5 * 1024 * 1024;

// 当前处理文件
const currFile = ref({});
// 当前文件分块
const fileChunkList = ref([]);

// 文件变化
const fileChange = async (event) => {
  const [file] = event.target.files;
  if (!file) return;
  currFile.value = file;
  fileChunkList.value = [];
  let { fileHash } = await getFileChunk(file);
  uploadChunks(fileHash);
}

// 用来测试已经中断上传的
let num = 0

// 上传请求
const uploadChunks = (fileHash) => {
  const requests = fileChunkList.value.map((item, index) => {
    const uploadedIndex = localStorage.getItem(fileHash)
    const uploadedArray = uploadedIndex ? uploadedIndex.split("-") : []
    if(uploadedArray.includes(`${index}`)) return;
    const formData = new FormData();
    formData.append(`${currFile.value.name}-${fileHash}-${index}`, item.chunk);
    formData.append("filename", currFile.value.name);
    formData.append("hash", `${fileHash}-${index}`);
    formData.append("fileHash", fileHash);
    return uploadFile('/upload', formData, onUploadProgress(item)).then((res)=>{
      num += 1
      const uploadedIndex = localStorage.getItem(fileHash)
      localStorage.setItem(fileHash,uploadedIndex?uploadedIndex+'-'+index:index)
      console.log(uploadedIndex,index,num)
      // if(num === 4){
      //   controller.abort()
      // }
    })
  });

  Promise.all(requests).then(() => {
    // 合并请求
    mergeChunks('/mergeChunks', { size: DefualtChunkSize, filename: currFile.value.name }).then((res)=>{
      console.log(res)
      if(res.data.data.code === 2000){
        localStorage.removeItem(fileHash)
      }
    });
  });
}

// 获取文件分块
const getFileChunk = (file, chunkSize = DefualtChunkSize) => {
  return new Promise((resovle) => {
    let blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
      // 计算该文件一共有多少个分片
      chunks = Math.ceil(file.size / chunkSize),
      // 当前分片
      currentChunk = 0,
      // 创建一个spark md5计算arraybuffer的对象,获取文件的md5的值,作为唯一标识
      spark = new SparkMD5.ArrayBuffer(),
      // 创建一个fileReader对象
      fileReader = new FileReader();

    fileReader.onload = function (e) {
      //当前的分片和总分片可以了解当前上传的进度
      // console.log('read chunk nr', currentChunk + 1, 'of');

      const chunk = e.target.result;
      spark.append(chunk);
      currentChunk++;

      if (currentChunk < chunks) {
        // 当前分片的md5 hash值
        // console.log('current chunck hash:',SparkMD5.ArrayBuffer.hash(e.target.result));
        // 不是最后一个分片,加载下一个分片
        loadNext();
      } else {
        //最后一个分片,结束,spark.end()就是最后文件的MD5hash值
        let fileHash = spark.end();
        console.info('finished computed hash', fileHash);
        resovle({ fileHash });
      }
    };

    fileReader.onerror = function () {
      console.warn('oops, something went wrong.');
    };

    function loadNext() {
      //开始位置
      let start = currentChunk * chunkSize,
      //结束位置
        end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;
      let chunk = blobSlice.call(file, start, end);
      fileChunkList.value.push({ chunk, size: chunk.size, name: currFile.value.name });
      //fileReader读取下一个文件分片
      fileReader.readAsArrayBuffer(chunk);
    }

    loadNext();
  });
}

// 总进度条
const totalPercentage = computed(() => {
  if (!fileChunkList.value.length) return 0;
  const loaded = fileChunkList.value
    .map(item => item.size * item.percentage)
    .reduce((curr, next) => curr + next);
  return parseInt((loaded / currFile.value.size).toFixed(2));
})

// 分块进度条
const onUploadProgress = (item) => (e) => {
  item.percentage = parseInt(String((e.loaded / e.total) * 100));
}

</script>

<template>
  <h1>大文件分片上传</h1>

  <input type="file" @change="fileChange" />

  <h2>总进度:{{ totalPercentage }} %</h2>

  <div class="percentage total">
    <p class="bg" :style="`width:${totalPercentage || 0}%`"></p>
  </div>

  <div class="progress" v-if="fileChunkList.length">
    <div class="progress-chunk" v-for="(item, index) in fileChunkList">
      <div class="clonm flex-1">{{ item.name }}_{{ index }}</div>
      <div class="clonm size">{{ item.size }} kb</div>
      <div class="clonm flex-1">
        <div class="percentage">
          <p class="bg" :style="`width:${item.percentage || 0}%`"></p>
        </div>
        <span class="text">{{ item.percentage || 0 }}%</span>
      </div>
    </div>
  </div>
</template>

<style>
* {
  margin: 0;
  padding: 0;
}
#app {
  font-family: Avenir, Helvetica, Arial, sans-serif;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
  text-align: center;
  color: #2c3e50;
}
h1,
h2 {
  margin: 20px;
  width: 90%;
}
.total {
  width: 91%;
  margin: auto;
}
.progress {
  width: 90%;
  margin: 20px auto;
  border: 1px solid #0677e9;
  padding: 10px;
}
.progress-chunk {
  display: flex;
  padding: 10px 0;
  border-bottom: 1px solid #c5d1dd;
}
.clonm {
  display: flex;
  align-items: center;
  word-break: break-word;
  text-align: center;
}
.size {
  width: 200px;
}
.flex-1 {
  flex: 1;
}
.percentage {
  flex: 1;
  background-color: #bdc1c5;
  border-radius: 3px;
  height: 6px;
  display: flex;
  align-items: center;
}
.bg {
  height: 100%;
  width: 0%;
  border-radius: 3px;
  background: rgb(22, 245, 2);
}
.text {
  width: 45px;
  padding: 0 5px;
}
</style>
<script setup>
import { computed, ref } from "vue"
import * as SparkMD5 from "spark-md5"
import { uploadFile, mergeChunks, controller } from "./request"

// 默认分块大小
const DefualtChunkSize = 5 * 1024 * 1024;

// 当前处理文件
const currFile = ref({});
// 当前文件分块
const fileChunkList = ref([]);

// 文件变化
const fileChange = async (event) => {
  const [file] = event.target.files;
  if (!file) return;
  currFile.value = file;
  fileChunkList.value = [];
  let { fileHash } = await getFileChunk(file);
  uploadChunks(fileHash);
}

// 用来测试已经中断上传的
let num = 0

// 上传请求
const uploadChunks = (fileHash) => {
  const requests = fileChunkList.value.map((item, index) => {
    const uploadedIndex = localStorage.getItem(fileHash)
    const uploadedArray = uploadedIndex ? uploadedIndex.split("-") : []
    if(uploadedArray.includes(`${index}`)) return;
    const formData = new FormData();
    formData.append(`${currFile.value.name}-${fileHash}-${index}`, item.chunk);
    formData.append("filename", currFile.value.name);
    formData.append("hash", `${fileHash}-${index}`);
    formData.append("fileHash", fileHash);
    return uploadFile('/upload', formData, onUploadProgress(item)).then((res)=>{
      num += 1
      const uploadedIndex = localStorage.getItem(fileHash)
      localStorage.setItem(fileHash,uploadedIndex?uploadedIndex+'-'+index:index)
      console.log(uploadedIndex,index,num)
      // if(num === 4){
      //   controller.abort()
      // }
    })
  });

  Promise.all(requests).then(() => {
    // 合并请求
    mergeChunks('/mergeChunks', { size: DefualtChunkSize, filename: currFile.value.name }).then((res)=>{
      console.log(res)
      if(res.data.data.code === 2000){
        localStorage.removeItem(fileHash)
      }
    });
  });
}

// 获取文件分块
const getFileChunk = (file, chunkSize = DefualtChunkSize) => {
  return new Promise((resovle) => {
    let blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
      // 计算该文件一共有多少个分片
      chunks = Math.ceil(file.size / chunkSize),
      // 当前分片
      currentChunk = 0,
      // 创建一个spark md5计算arraybuffer的对象,获取文件的md5的值,作为唯一标识
      spark = new SparkMD5.ArrayBuffer(),
      // 创建一个fileReader对象
      fileReader = new FileReader();

    fileReader.onload = function (e) {
      //当前的分片和总分片可以了解当前上传的进度
      // console.log('read chunk nr', currentChunk + 1, 'of');

      const chunk = e.target.result;
      spark.append(chunk);
      currentChunk++;

      if (currentChunk < chunks) {
        // 当前分片的md5 hash值
        // console.log('current chunck hash:',SparkMD5.ArrayBuffer.hash(e.target.result));
        // 不是最后一个分片,加载下一个分片
        loadNext();
      } else {
        //最后一个分片,结束,spark.end()就是最后文件的MD5hash值
        let fileHash = spark.end();
        console.info('finished computed hash', fileHash);
        resovle({ fileHash });
      }
    };

    fileReader.onerror = function () {
      console.warn('oops, something went wrong.');
    };

    function loadNext() {
      //开始位置
      let start = currentChunk * chunkSize,
      //结束位置
        end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;
      let chunk = blobSlice.call(file, start, end);
      fileChunkList.value.push({ chunk, size: chunk.size, name: currFile.value.name });
      //fileReader读取下一个文件分片
      fileReader.readAsArrayBuffer(chunk);
    }

    loadNext();
  });
}

// 总进度条
const totalPercentage = computed(() => {
  if (!fileChunkList.value.length) return 0;
  const loaded = fileChunkList.value
    .map(item => item.size * item.percentage)
    .reduce((curr, next) => curr + next);
  return parseInt((loaded / currFile.value.size).toFixed(2));
})

// 分块进度条
const onUploadProgress = (item) => (e) => {
  item.percentage = parseInt(String((e.loaded / e.total) * 100));
}

</script>

<template>
  <h1>大文件分片上传</h1>

  <input type="file" @change="fileChange" />

  <h2>总进度:{{ totalPercentage }} %</h2>

  <div class="percentage total">
    <p class="bg" :style="`width:${totalPercentage || 0}%`"></p>
  </div>

  <div class="progress" v-if="fileChunkList.length">
    <div class="progress-chunk" v-for="(item, index) in fileChunkList">
      <div class="clonm flex-1">{{ item.name }}_{{ index }}</div>
      <div class="clonm size">{{ item.size }} kb</div>
      <div class="clonm flex-1">
        <div class="percentage">
          <p class="bg" :style="`width:${item.percentage || 0}%`"></p>
        </div>
        <span class="text">{{ item.percentage || 0 }}%</span>
      </div>
    </div>
  </div>
</template>

<style>
* {
  margin: 0;
  padding: 0;
}
#app {
  font-family: Avenir, Helvetica, Arial, sans-serif;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
  text-align: center;
  color: #2c3e50;
}
h1,
h2 {
  margin: 20px;
  width: 90%;
}
.total {
  width: 91%;
  margin: auto;
}
.progress {
  width: 90%;
  margin: 20px auto;
  border: 1px solid #0677e9;
  padding: 10px;
}
.progress-chunk {
  display: flex;
  padding: 10px 0;
  border-bottom: 1px solid #c5d1dd;
}
.clonm {
  display: flex;
  align-items: center;
  word-break: break-word;
  text-align: center;
}
.size {
  width: 200px;
}
.flex-1 {
  flex: 1;
}
.percentage {
  flex: 1;
  background-color: #bdc1c5;
  border-radius: 3px;
  height: 6px;
  display: flex;
  align-items: center;
}
.bg {
  height: 100%;
  width: 0%;
  border-radius: 3px;
  background: rgb(22, 245, 2);
}
.text {
  width: 45px;
  padding: 0 5px;
}
</style>

服务端整体代码

js
const Koa = require('koa');
const router = require('koa-router')();
const cors = require('koa2-cors');
const koaBody = require('koa-body');
const fs = require('fs');
const path = require('path');

const outputPath = path.resolve(__dirname, 'resources');
const app = new Koa();
let currChunk = {}; // 当前 chunk 信息

/*  */
// 处理跨域
app.use(cors({
  //设置允许来自指定域名请求
  origin: (ctx) => {
    return '*' // 允许来自所有域名请求
  },
  maxAge: 5, //指定本次预检请求的有效期,单位为秒。
  credentials: true, //是否允许发送Cookie
  allowMethods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], //设置所允许的HTTP请求方法
  allowHeaders: ['Content-Type', 'Authorization', 'Accept'], //设置服务器支持的所有头信息字段
  exposeHeaders: ['WWW-Authenticate', 'Server-Authorization'] //设置获取其他自定义字段
}));

// 处理 body 数据
app.use(koaBody({}));

function mkdir(dirname) {
  if (fs.existsSync(dirname)) {
    return true;
  } else {
    if (mkdir(path.dirname(dirname))) {
      fs.mkdirSync(dirname);
      return true;
    }
  }
}

// 上传请求
router.post(
  '/upload',
  // 处理文件 form-data 数据
  koaBody({
    multipart: true,
    formidable: {
      uploadDir: outputPath,
      onFileBegin: (name, file) => {
        let nameStr = name.split('-');
        let fileHash = nameStr[nameStr.length-2]
        let index = nameStr[nameStr.length-1]
        let filename = nameStr.slice(0,nameStr.length-2).join('-')
        filename = '_' + filename
        const dir = path.join(outputPath, filename);
        // 保存当前 chunk 信息,发生错误时进行返回
        currChunk = {
          filename,
          fileHash,
          index
        };

        // 检查文件夹是否存在如果不存在则新建文件夹
        if (!fs.existsSync(dir)) {
          mkdir(dir)
          // fs.mkdirSync(dir);
        }
        // console.log("创建路径", dir, fileHash, name)
        // console.log("创建路径", filename)
        // 覆盖文件存放的完整路径
        file.path = `${dir}/${fileHash}-${index}`;
      },
      onError: (error) => {
        app.status = 400;
        app.body = {
          code: 400,
          msg: "上传失败",
          data: currChunk
        };
        return;
      },
    },
  }),
  // 处理响应
  async (ctx) => {
    ctx.set("Content-Type", "application/json");
    ctx.body = JSON.stringify({
      code: 2000,
      message: 'upload successfully!'
    });

  });

// 合并请求
router.post('/mergeChunks', async (ctx) => {
  const {
    filename,
    size
  } = ctx.request.body;
  // 合并 chunks
  // await mergeFileChunk(path.join(outputPath, '_' + filename), filename, size);
  await mergeFileChunk(path.join(outputPath, filename), '_'+filename, size);

  // 处理响应
  ctx.set("Content-Type", "application/json");
  ctx.body = JSON.stringify({
    data: {
      code: 2000,
      filename,
      size
    },
    message: 'merge chunks successful!'
  });
});

// 通过管道处理流 
const pipeStream = (path, writeStream) => {
  return new Promise(resolve => {
    const readStream = fs.createReadStream(path);
    readStream.pipe(writeStream);
    readStream.on("end", () => {
      fs.unlinkSync(path);
      resolve();
    });
  });
}

// 合并切片
const mergeFileChunk = async (filePath, filename, size) => {
  const chunkDir = path.join(outputPath, filename);
  console.log('chunkDir', chunkDir)
  const chunkPaths = fs.readdirSync(chunkDir);

  if (!chunkPaths.length) return;

  // 根据切片下标进行排序,否则直接读取目录的获得的顺序可能会错乱
  chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
  console.log("chunkPaths = ", chunkPaths);
  console.log("filePath = ", filePath);

  await Promise.all(
    chunkPaths.map((chunkPath, index) =>
      pipeStream(
        path.resolve(chunkDir, chunkPath),
        // 指定位置创建可写流
        fs.createWriteStream(filePath, {
          start: index * size,
          end: (index + 1) * size
        })
      )
    )
  );

  // 合并后删除保存切片的目录
  deleteFolderRecursive(chunkDir)
  // fs.rmdirSync(chunkDir);
};

function deleteFolderRecursive(url) {
  var files = [];
  /**
   * 判断给定的路径是否存在
   */
  if (fs.existsSync(url)) {
    /**
     * 返回文件和子目录的数组
     */
    files = fs.readdirSync(url);
    files.forEach(function (file, index) {

      var curPath = path.join(url, file);
      console.log("curPath",curPath)
      /**
       * fs.statSync同步读取文件夹文件,如果是文件夹,在重复触发函数
       */
      if (fs.statSync(curPath).isDirectory()) { // recurse
        Book.deleteFolderRecursive(curPath);
      } else {
        fs.unlinkSync(curPath);
      }
    });
    /**
     * 清除文件夹
     */
    fs.rmdirSync(url);
  } else {
    console.log("路径不存在");
  }
}

// 注册路由
app.use(router.routes(), router.allowedMethods())

// 启动服务,监听端口
app.listen(3001, (error) => {
  if (!error) {
    console.log('server is runing at port 3001...');
  }
});
const Koa = require('koa');
const router = require('koa-router')();
const cors = require('koa2-cors');
const koaBody = require('koa-body');
const fs = require('fs');
const path = require('path');

const outputPath = path.resolve(__dirname, 'resources');
const app = new Koa();
let currChunk = {}; // 当前 chunk 信息

/*  */
// 处理跨域
app.use(cors({
  //设置允许来自指定域名请求
  origin: (ctx) => {
    return '*' // 允许来自所有域名请求
  },
  maxAge: 5, //指定本次预检请求的有效期,单位为秒。
  credentials: true, //是否允许发送Cookie
  allowMethods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], //设置所允许的HTTP请求方法
  allowHeaders: ['Content-Type', 'Authorization', 'Accept'], //设置服务器支持的所有头信息字段
  exposeHeaders: ['WWW-Authenticate', 'Server-Authorization'] //设置获取其他自定义字段
}));

// 处理 body 数据
app.use(koaBody({}));

function mkdir(dirname) {
  if (fs.existsSync(dirname)) {
    return true;
  } else {
    if (mkdir(path.dirname(dirname))) {
      fs.mkdirSync(dirname);
      return true;
    }
  }
}

// 上传请求
router.post(
  '/upload',
  // 处理文件 form-data 数据
  koaBody({
    multipart: true,
    formidable: {
      uploadDir: outputPath,
      onFileBegin: (name, file) => {
        let nameStr = name.split('-');
        let fileHash = nameStr[nameStr.length-2]
        let index = nameStr[nameStr.length-1]
        let filename = nameStr.slice(0,nameStr.length-2).join('-')
        filename = '_' + filename
        const dir = path.join(outputPath, filename);
        // 保存当前 chunk 信息,发生错误时进行返回
        currChunk = {
          filename,
          fileHash,
          index
        };

        // 检查文件夹是否存在如果不存在则新建文件夹
        if (!fs.existsSync(dir)) {
          mkdir(dir)
          // fs.mkdirSync(dir);
        }
        // console.log("创建路径", dir, fileHash, name)
        // console.log("创建路径", filename)
        // 覆盖文件存放的完整路径
        file.path = `${dir}/${fileHash}-${index}`;
      },
      onError: (error) => {
        app.status = 400;
        app.body = {
          code: 400,
          msg: "上传失败",
          data: currChunk
        };
        return;
      },
    },
  }),
  // 处理响应
  async (ctx) => {
    ctx.set("Content-Type", "application/json");
    ctx.body = JSON.stringify({
      code: 2000,
      message: 'upload successfully!'
    });

  });

// 合并请求
router.post('/mergeChunks', async (ctx) => {
  const {
    filename,
    size
  } = ctx.request.body;
  // 合并 chunks
  // await mergeFileChunk(path.join(outputPath, '_' + filename), filename, size);
  await mergeFileChunk(path.join(outputPath, filename), '_'+filename, size);

  // 处理响应
  ctx.set("Content-Type", "application/json");
  ctx.body = JSON.stringify({
    data: {
      code: 2000,
      filename,
      size
    },
    message: 'merge chunks successful!'
  });
});

// 通过管道处理流 
const pipeStream = (path, writeStream) => {
  return new Promise(resolve => {
    const readStream = fs.createReadStream(path);
    readStream.pipe(writeStream);
    readStream.on("end", () => {
      fs.unlinkSync(path);
      resolve();
    });
  });
}

// 合并切片
const mergeFileChunk = async (filePath, filename, size) => {
  const chunkDir = path.join(outputPath, filename);
  console.log('chunkDir', chunkDir)
  const chunkPaths = fs.readdirSync(chunkDir);

  if (!chunkPaths.length) return;

  // 根据切片下标进行排序,否则直接读取目录的获得的顺序可能会错乱
  chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
  console.log("chunkPaths = ", chunkPaths);
  console.log("filePath = ", filePath);

  await Promise.all(
    chunkPaths.map((chunkPath, index) =>
      pipeStream(
        path.resolve(chunkDir, chunkPath),
        // 指定位置创建可写流
        fs.createWriteStream(filePath, {
          start: index * size,
          end: (index + 1) * size
        })
      )
    )
  );

  // 合并后删除保存切片的目录
  deleteFolderRecursive(chunkDir)
  // fs.rmdirSync(chunkDir);
};

function deleteFolderRecursive(url) {
  var files = [];
  /**
   * 判断给定的路径是否存在
   */
  if (fs.existsSync(url)) {
    /**
     * 返回文件和子目录的数组
     */
    files = fs.readdirSync(url);
    files.forEach(function (file, index) {

      var curPath = path.join(url, file);
      console.log("curPath",curPath)
      /**
       * fs.statSync同步读取文件夹文件,如果是文件夹,在重复触发函数
       */
      if (fs.statSync(curPath).isDirectory()) { // recurse
        Book.deleteFolderRecursive(curPath);
      } else {
        fs.unlinkSync(curPath);
      }
    });
    /**
     * 清除文件夹
     */
    fs.rmdirSync(url);
  } else {
    console.log("路径不存在");
  }
}

// 注册路由
app.use(router.routes(), router.allowedMethods())

// 启动服务,监听端口
app.listen(3001, (error) => {
  if (!error) {
    console.log('server is runing at port 3001...');
  }
});