lz4opt.h
Go to the documentation of this file.
1 /*
2  lz4opt.h - Optimal Mode of LZ4
3  Copyright (C) 2015-2016, Przemyslaw Skibinski <inikep@gmail.com>
4  Note : this file is intended to be included within lz4hc.c
5 
6  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are
10  met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above
15  copyright notice, this list of conditions and the following disclaimer
16  in the documentation and/or other materials provided with the
17  distribution.
18 
19  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31  You can contact the author at :
32  - LZ4 source repository : https://github.com/lz4/lz4
33  - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
34 */
35 
36 #define LZ4_OPT_NUM (1<<12)
37 
38 
39 typedef struct
40 {
41  int off;
42  int len;
44 
45 typedef struct
46 {
47  int price;
48  int off;
49  int mlen;
50  int litlen;
52 
53 
54 /* price in bits */
55 FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
56 {
57  size_t price = 8*litlen;
58  if (litlen >= (size_t)RUN_MASK) price+=8*(1+(litlen-RUN_MASK)/255);
59  return price;
60 }
61 
62 
63 /* requires mlen >= MINMATCH */
64 FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
65 {
66  size_t price = 16 + 8; /* 16-bit offset + token */
67 
68  price += LZ4HC_literalsPrice(litlen);
69 
70  mlen -= MINMATCH;
71  if (mlen >= (size_t)ML_MASK) price+=8*(1+(mlen-ML_MASK)/255);
72 
73  return price;
74 }
75 
76 
77 /*-*************************************
78 * Binary Tree search
79 ***************************************/
82  const BYTE* const ip,
83  const BYTE* const iHighLimit,
84  size_t best_mlen,
86  int* matchNum)
87 {
88  U16* const chainTable = ctx->chainTable;
89  U32* const HashTable = ctx->hashTable;
90  const BYTE* const base = ctx->base;
91  const U32 dictLimit = ctx->dictLimit;
92  const U32 current = (U32)(ip - base);
93  const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
94  const BYTE* const dictBase = ctx->dictBase;
95  const BYTE* match;
96  int nbAttempts = ctx->searchNum;
97  int mnum = 0;
98  U16 *ptr0, *ptr1, delta0, delta1;
99  U32 matchIndex;
100  size_t matchLength = 0;
101  U32* HashPos;
102 
103  if (ip + MINMATCH > iHighLimit) return 1;
104 
105  /* HC4 match finder */
106  HashPos = &HashTable[LZ4HC_hashPtr(ip)];
107  matchIndex = *HashPos;
108  *HashPos = current;
109 
110  ptr0 = &DELTANEXTMAXD(current*2+1);
111  ptr1 = &DELTANEXTMAXD(current*2);
112  delta0 = delta1 = (U16)(current - matchIndex);
113 
114  while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
115  nbAttempts--;
116  if (matchIndex >= dictLimit) {
117  match = base + matchIndex;
118  matchLength = LZ4_count(ip, match, iHighLimit);
119  } else {
120  const BYTE* vLimit = ip + (dictLimit - matchIndex);
121  match = dictBase + matchIndex;
122  if (vLimit > iHighLimit) vLimit = iHighLimit;
123  matchLength = LZ4_count(ip, match, vLimit);
124  if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
125  matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
126  }
127 
128  if (matchLength > best_mlen) {
129  best_mlen = matchLength;
130  if (matches) {
131  if (matchIndex >= dictLimit)
132  matches[mnum].off = (int)(ip - match);
133  else
134  matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
135  matches[mnum].len = (int)matchLength;
136  mnum++;
137  }
138  if (best_mlen > LZ4_OPT_NUM) break;
139  }
140 
141  if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
142  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
143 
144  if (*(ip+matchLength) < *(match+matchLength)) {
145  *ptr0 = delta0;
146  ptr0 = &DELTANEXTMAXD(matchIndex*2);
147  if (*ptr0 == (U16)-1) break;
148  delta0 = *ptr0;
149  delta1 += delta0;
150  matchIndex -= delta0;
151  } else {
152  *ptr1 = delta1;
153  ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
154  if (*ptr1 == (U16)-1) break;
155  delta1 = *ptr1;
156  delta0 += delta1;
157  matchIndex -= delta1;
158  }
159  }
160 
161  *ptr0 = (U16)-1;
162  *ptr1 = (U16)-1;
163  if (matchNum) *matchNum = mnum;
164  /* if (best_mlen > 8) return best_mlen-8; */
165  if (!matchNum) return 1;
166  return 1;
167 }
168 
169 
170 FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
171 {
172  const BYTE* const base = ctx->base;
173  const U32 target = (U32)(ip - base);
174  U32 idx = ctx->nextToUpdate;
175  while(idx < target)
176  idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
177 }
178 
179 
180 /** Tree updater, providing best match */
182  LZ4HC_CCtx_internal* ctx,
183  const BYTE* const ip, const BYTE* const iHighLimit,
184  size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
185 {
186  int mnum = 0;
187  if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */
188  if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
189  best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
190  ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
191  return mnum;
192 }
193 
194 
195 #define SET_PRICE(pos, mlen, offset, litlen, price) \
196 { \
197  while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \
198  opt[pos].mlen = (int)mlen; \
199  opt[pos].off = (int)offset; \
200  opt[pos].litlen = (int)litlen; \
201  opt[pos].price = (int)price; \
202 }
203 
204 
206  LZ4HC_CCtx_internal* ctx,
207  const char* const source,
208  char* dest,
209  int inputSize,
210  int maxOutputSize,
212  const size_t sufficient_len,
213  const int fullUpdate
214  )
215 {
218  const BYTE *inr = NULL;
219  size_t res, cur, cur2;
220  size_t i, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos;
221 
222  const BYTE* ip = (const BYTE*) source;
223  const BYTE* anchor = ip;
224  const BYTE* const iend = ip + inputSize;
225  const BYTE* const mflimit = iend - MFLIMIT;
226  const BYTE* const matchlimit = (iend - LASTLITERALS);
227  BYTE* op = (BYTE*) dest;
228  BYTE* const oend = op + maxOutputSize;
229 
230  /* init */
231  ctx->end += inputSize;
232  ip++;
233 
234  /* Main Loop */
235  while (ip < mflimit) {
236  memset(opt, 0, sizeof(LZ4HC_optimal_t));
237  last_pos = 0;
238  llen = ip - anchor;
239  match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
240  if (!match_num) { ip++; continue; }
241 
242  if ((size_t)matches[match_num-1].len > sufficient_len) {
243  best_mlen = matches[match_num-1].len;
244  best_off = matches[match_num-1].off;
245  cur = 0;
246  last_pos = 1;
247  goto encode;
248  }
249 
250  /* set prices using matches at position = 0 */
251  for (i = 0; i < match_num; i++) {
252  mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
253  best_mlen = (matches[i].len < LZ4_OPT_NUM) ? matches[i].len : LZ4_OPT_NUM;
254  while (mlen <= best_mlen) {
255  litlen = 0;
256  price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
257  SET_PRICE(mlen, mlen, matches[i].off, litlen, price);
258  mlen++;
259  }
260  }
261 
262  if (last_pos < MINMATCH) { ip++; continue; }
263 
264  /* check further positions */
265  opt[0].mlen = opt[1].mlen = 1;
266  for (cur = 1; cur <= last_pos; cur++) {
267  inr = ip + cur;
268 
269  if (opt[cur-1].mlen == 1) {
270  litlen = opt[cur-1].litlen + 1;
271  if (cur != litlen) {
272  price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
273  } else {
274  price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
275  }
276  } else {
277  litlen = 1;
278  price = opt[cur - 1].price + LZ4HC_literalsPrice(litlen);
279  }
280 
281  mlen = 1;
282  best_mlen = 0;
283  if (cur > last_pos || price < (size_t)opt[cur].price)
284  SET_PRICE(cur, mlen, best_mlen, litlen, price);
285 
286  if (cur == last_pos || inr >= mflimit) break;
287 
288  match_num = LZ4HC_BinTree_GetAllMatches(ctx, inr, matchlimit, MINMATCH-1, matches, fullUpdate);
289  if (match_num > 0 && (size_t)matches[match_num-1].len > sufficient_len) {
290  best_mlen = matches[match_num-1].len;
291  best_off = matches[match_num-1].off;
292  last_pos = cur + 1;
293  goto encode;
294  }
295 
296  /* set prices using matches at position = cur */
297  for (i = 0; i < match_num; i++) {
298  mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
299  cur2 = cur;
300  best_mlen = (cur2 + matches[i].len < LZ4_OPT_NUM) ? (size_t)matches[i].len : LZ4_OPT_NUM - cur2;
301 
302  while (mlen <= best_mlen) {
303  if (opt[cur2].mlen == 1) {
304  litlen = opt[cur2].litlen;
305 
306  if (cur2 != litlen)
307  price = opt[cur2 - litlen].price + LZ4HC_sequencePrice(litlen, mlen);
308  else
309  price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
310  } else {
311  litlen = 0;
312  price = opt[cur2].price + LZ4HC_sequencePrice(litlen, mlen);
313  }
314 
315  if (cur2 + mlen > last_pos || price < (size_t)opt[cur2 + mlen].price) { // || (((int)price == opt[cur2 + mlen].price) && (opt[cur2 + mlen-1].mlen == 1))) {
316  SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price);
317  }
318  mlen++;
319  }
320  }
321  } /* for (cur = 1; cur <= last_pos; cur++) */
322 
323  best_mlen = opt[last_pos].mlen;
324  best_off = opt[last_pos].off;
325  cur = last_pos - best_mlen;
326 
327 encode: /* cur, last_pos, best_mlen, best_off have to be set */
328  opt[0].mlen = 1;
329  while (1) {
330  mlen = opt[cur].mlen;
331  offset = opt[cur].off;
332  opt[cur].mlen = (int)best_mlen;
333  opt[cur].off = (int)best_off;
334  best_mlen = mlen;
335  best_off = offset;
336  if (mlen > cur) break;
337  cur -= mlen;
338  }
339 
340  cur = 0;
341  while (cur < last_pos) {
342  mlen = opt[cur].mlen;
343  if (mlen == 1) { ip++; cur++; continue; }
344  offset = opt[cur].off;
345  cur += mlen;
346 
347  res = LZ4HC_encodeSequence(&ip, &op, &anchor, (int)mlen, ip - offset, limit, oend);
348  if (res) return 0;
349  }
350  }
351 
352  /* Encode Last Literals */
353  { int lastRun = (int)(iend - anchor);
354  if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
355  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
356  else *op++ = (BYTE)(lastRun<<ML_BITS);
357  memcpy(op, anchor, iend - anchor);
358  op += iend-anchor;
359  }
360 
361  /* End */
362  return (int) ((char*)op-dest);
363 }
const XML_Char int len
Definition: expat.h:262
char int int maxOutputSize
Definition: lz4.h:435
#define ML_BITS
Definition: lz4.cxx:277
#define DELTANEXTMAXD(p)
Definition: lz4hc.cxx:87
const XML_Char * target
Definition: expat.h:268
#define MINMATCH
Definition: lz4.cxx:263
#define SET_PRICE(pos, mlen, offset, litlen, price)
Definition: lz4opt.h:195
unsigned int dictLimit
Definition: lz4hc.h:171
ps_atom_t encode(std::string const &)
unsigned int U32
Definition: lz4.cxx:148
FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
Definition: lz4opt.h:55
limitedOutput_directive
Definition: lz4.cxx:380
static unsigned LZ4_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit)
Definition: lz4.cxx:351
static int LZ4HC_compress_optimal(LZ4HC_CCtx_internal *ctx, const char *const source, char *dest, int inputSize, int maxOutputSize, limitedOutput_directive limit, const size_t sufficient_len, const int fullUpdate)
Definition: lz4opt.h:205
#define LZ4_OPT_NUM
Definition: lz4opt.h:36
TString ip
Definition: loadincs.C:5
#define LASTLITERALS
Definition: lz4.cxx:266
const unsigned char * dictBase
Definition: lz4hc.h:169
const XML_Char int const XML_Char int const XML_Char * base
Definition: expat.h:331
#define MAX_DISTANCE
Definition: lz4.cxx:275
static U32 LZ4HC_hashPtr(const void *ptr)
Definition: lz4hc.cxx:90
unsigned int hashTable[LZ4HC_HASHTABLESIZE]
Definition: lz4hc.h:165
unsigned int lowLimit
Definition: lz4hc.h:172
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal *ctx, const BYTE *const ip, const BYTE *const iHighLimit)
Definition: lz4opt.h:170
#define RUN_MASK
Definition: lz4.cxx:280
FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
Definition: lz4opt.h:64
FORCE_INLINE int LZ4HC_encodeSequence(const BYTE **ip, BYTE **op, const BYTE **anchor, int matchLength, const BYTE *const match, limitedOutput_directive limitedOutputBuffer, BYTE *oend)
Definition: lz4hc.cxx:258
unsigned int nextToUpdate
Definition: lz4hc.h:173
unsigned char BYTE
Definition: lz4.cxx:146
#define FORCE_INLINE
Definition: lz4.cxx:110
#define ML_MASK
Definition: lz4.cxx:278
const char * source
Definition: lz4.h:436
unsigned short U16
Definition: lz4.cxx:147
unsigned int searchNum
Definition: lz4hc.h:174
const unsigned char * base
Definition: lz4hc.h:168
const char char int inputSize
Definition: lz4.h:436
#define MFLIMIT
Definition: lz4.cxx:267
FORCE_INLINE int LZ4HC_BinTree_GetAllMatches(LZ4HC_CCtx_internal *ctx, const BYTE *const ip, const BYTE *const iHighLimit, size_t best_mlen, LZ4HC_match_t *matches, const int fullUpdate)
Definition: lz4opt.h:181
unsigned short chainTable[LZ4HC_MAXD]
Definition: lz4hc.h:166
FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches(LZ4HC_CCtx_internal *ctx, const BYTE *const ip, const BYTE *const iHighLimit, size_t best_mlen, LZ4HC_match_t *matches, int *matchNum)
Definition: lz4opt.h:80
const unsigned char * end
Definition: lz4hc.h:167