#include "LCMultiReader.h"
#include "LCSegmentMergeQueue.h"
#include "LCSegmentMergeInfo.h"
#include "LCSegmentReader.h"
#include "GNUstep.h"
/** An IndexReader which reads multiple indexes, appending their content.
*
* @version $Id: LCMultiReader.m 544 2006-02-18 07:29:53Z yjchen $
*/
@interface LCMultiReader (LCPrivate)
- (void) initialize: (NSArray *) subReaders;
- (int) readerIndex: (int) n;
@end
@implementation LCMultiReader
- (id) init
{
self = [super init];
normsCache = [[NSMutableDictionary alloc] init];
maxDoc = 0;
numDocs = -1;
hasDeletions = NO;
return self;
}
/**
*
Construct a MultiReader aggregating the named set of (sub)readers.
* Directory locking for delete, undeleteAll, and setNorm operations is
* left to the subreaders.
* Note that all subreaders are closed if this Multireader is closed.
* @param subReaders set of (sub)readers
* @throws IOException
*/
- (id) initWithReaders: (NSArray *) r
{
self = [self init];
[super initWithDirectory: ([r count] == 0) ? nil : [(LCIndexReader *)[r objectAtIndex: 0] directory]];
[self initialize: r];
return self;
}
/** Construct reading the named set of readers. */
- (id) initWithDirectory: (id ) dir
segmentInfos: (LCSegmentInfos *) sis
close: (BOOL) close
readers: (NSArray *) sr
{
self = [self init];
[super initWithDirectory: dir
segmentInfos: sis
closeDirectory: close];
[self initialize: sr];
return self;
}
- (void) initialize: (NSArray *) sr
{
ASSIGN(subReaders, sr);
starts = [[NSMutableArray alloc] init]; // build starts array
int i;
for (i = 0; i < [subReaders count]; i++) {
[starts addObject: [NSNumber numberWithInt: maxDoc]];
maxDoc += [[subReaders objectAtIndex: i] maximalDocument]; // compute maxDocs
if ([[subReaders objectAtIndex: i] hasDeletions])
hasDeletions = YES;
}
[starts addObject: [NSNumber numberWithInt: maxDoc]];
}
/** Return an array of term frequency vectors for the specified document.
* The array contains a vector for each vectorized field in the document.
* Each vector vector contains term numbers and frequencies for all terms
* in a given vectorized field.
* If no such fields existed, the method returns null.
*/
- (NSArray *) termFrequencyVectors: (int) n
{
int i = [self readerIndex: n]; // find segment num
return [[subReaders objectAtIndex: i] termFrequencyVectors: (n - [[starts objectAtIndex: i] intValue])]; // dispatch to segment
}
- (id ) termFrequencyVector: (int) n field: (NSString *) field
{
int i = [self readerIndex: n]; // find segment num
return [[subReaders objectAtIndex: i] termFrequencyVector: (n - [[starts objectAtIndex: i] intValue])
field: field];
}
- (int) numberOfDocuments
{
if (numDocs == -1) { // check cache
int n = 0; // cache miss--recompute
int i;
for (i = 0; i < [subReaders count]; i++)
n += [[subReaders objectAtIndex: i] numberOfDocuments]; // sum from readers
numDocs = n;
}
return numDocs;
}
- (int) maximalDocument
{
return maxDoc;
}
- (LCDocument *) document: (int) n
{
int i = [self readerIndex: n]; // find segment num
return [[subReaders objectAtIndex: i] document: (n - [[starts objectAtIndex: i] intValue])]; // dispatch to segment
}
- (BOOL) isDeleted: (int) n
{
int i = [self readerIndex: n]; // find segment num
return [[subReaders objectAtIndex: i] isDeleted: (n - [[starts objectAtIndex: i] intValue])]; // dispatch to segment
}
- (BOOL) hasDeletions
{
return hasDeletions;
}
- (void) doDelete: (int) n
{
numDocs = -1; // invalidate cache
int i = [self readerIndex: n]; // find segment num
[(LCIndexReader *)[subReaders objectAtIndex: i] deleteDocument: (n - [[starts objectAtIndex: i] intValue])]; // dispatch to segment
hasDeletions = YES;
}
- (void) doUndeleteAll
{
int i;
for (i = 0; i < [subReaders count]; i++)
[[subReaders objectAtIndex: i] undeleteAll];
hasDeletions = NO;
numDocs = -1; // invalidate cache
}
- (int) readerIndex: (int) n // find reader for doc n:
{
int lo = 0; // search starts array
int hi = [subReaders count] - 1; // for first element less
while (hi >= lo) {
int mid = (lo + hi) >> 1;
int midValue = [[starts objectAtIndex: mid] intValue];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else { // found a match
while (mid+1 < [subReaders count] && [[starts objectAtIndex: (mid+1)] intValue] == midValue) {
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
- (BOOL) hasNorms: (NSString *) field
{
int i;
for (i = 0; i < [subReaders count]; i++)
{
if ([[subReaders objectAtIndex: i] hasNorms: field]) return YES;
}
return NO;
}
- (NSData *) fakeNorms
{
if (ones == nil)
ASSIGN(ones, [LCSegmentReader createFakeNorms: [self maximalDocument]]);
return ones;
}
- (NSData *) norms: (NSString *) field
{
NSMutableData *bytes = [normsCache objectForKey: field];
if (bytes != nil)
return bytes; // cache hit
if (![self hasNorms: field])
return [self fakeNorms];
bytes = [[NSMutableData alloc] init];
int i;
for (i = 0; i < [subReaders count]; i++)
[[subReaders objectAtIndex: i] setNorms: field bytes: bytes offset: [[starts objectAtIndex: i] intValue]];
[normsCache setObject: bytes forKey: field]; // update cache
return AUTORELEASE(bytes);
}
- (void) setNorms: (NSString *) field
bytes: (NSMutableData *) result offset: (int) offset
{
NSData *bytes = [normsCache objectForKey: field];
if ((bytes == nil) && (![self hasNorms: field]))
bytes = [self fakeNorms];
if (bytes != nil) // cache hit
{
NSRange r = NSMakeRange(offset, [self maximalDocument]);
[result replaceBytesInRange: r withBytes: [bytes bytes]];
}
int i;
for (i = 0; i < [subReaders count]; i++) // read from segments
[[subReaders objectAtIndex: i] setNorms: field bytes: result offset: offset + [[starts objectAtIndex: i] intValue]];
}
- (void) doSetNorm: (int) n field: (NSString *) field charValue: (char) value
{
[normsCache removeObjectForKey: field]; // clear cache
int i = [self readerIndex: n]; // find segment num
[[subReaders objectAtIndex: i] setNorm: (n-[[starts objectAtIndex: i] intValue]) field: field charValue: value]; // dispatch
}
- (LCTermEnumerator *) termEnumerator
{
return AUTORELEASE([[LCMultiTermEnumerator alloc] initWithReaders: subReaders
starts: starts
term: nil]);
}
- (LCTermEnumerator *) termEnumeratorWithTerm: (LCTerm *) term
{
return AUTORELEASE([[LCMultiTermEnumerator alloc] initWithReaders: subReaders
starts: starts
term: term]);
}
- (long) documentFrequency: (LCTerm *) t
{
int total = 0; // sum freqs in segments
int i;
for (i = 0; i < [subReaders count]; i++)
{
total += [[subReaders objectAtIndex: i] documentFrequency: t];
}
return total;
}
- (id ) termDocuments
{
return AUTORELEASE([[LCMultiTermDocuments alloc] initWithReaders: subReaders
starts: starts]);
}
- (id ) termPositions
{
return AUTORELEASE([[LCMultiTermPositions alloc] initWithReaders: subReaders
starts: starts]);
}
- (void) doCommit
{
int i;
for (i = 0; i < [subReaders count]; i++)
[[subReaders objectAtIndex: i] commit];
}
- (void) doClose
{
int i;
for (i = 0; i < [subReaders count]; i++)
[[subReaders objectAtIndex: i] close];
}
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption)
*/
- (NSArray *) fieldNames: (LCFieldOption) fieldOption
{
// maintain a unique set of field names
NSMutableSet *fieldSet = [[NSMutableSet alloc] init];
int i;
for (i = 0; i < [subReaders count]; i++) {
LCIndexReader *reader = [subReaders objectAtIndex: i];
[fieldSet addObjectsFromArray: [reader fieldNames: fieldOption]];
}
return [fieldSet allObjects];
}
@end
@implementation LCMultiTermEnumerator
- (id) initWithReaders: (NSArray *) readers
starts: (NSArray *) starts
term: (LCTerm *) t
{
self = [super init];
queue = [(LCSegmentMergeQueue *)[LCSegmentMergeQueue alloc] initWithSize: [readers count]];
int i;
for (i = 0; i < [readers count]; i++) {
LCIndexReader *reader = [readers objectAtIndex: i];
LCTermEnumerator *termEnum;
if (t != nil) {
termEnum = [reader termEnumeratorWithTerm: t];
} else
termEnum = [reader termEnumerator];
LCSegmentMergeInfo *smi = [[LCSegmentMergeInfo alloc] initWithBase: [[starts objectAtIndex: i] intValue] termEnumerator: termEnum reader: reader];
if ((t == nil ? [smi hasNextTerm] : ([termEnum term] != nil)))
[queue put: smi]; // initialize queue
else
[smi close];
RELEASE(smi);
}
if (t != nil && [queue size] > 0) {
[self hasNextTerm];
}
return self;
}
- (BOOL) hasNextTerm
{
LCSegmentMergeInfo *top = (LCSegmentMergeInfo *)[queue top];
if (top == nil) {
term = nil;
return NO;
}
/* LuceneKit: Keep a copy so that it won't change along with queue */
term = [[top term] copy];
docFreq = 0;
while (top != nil && [term compare: [top term]] == NSOrderedSame) {
[queue pop];
docFreq += [[top termEnumerator] documentFrequency]; // increment freq
if ([top hasNextTerm])
[queue put: top]; // restore queue
else
[top close]; // done with a segment
top = (LCSegmentMergeInfo *)[queue top];
}
return YES;
}
- (LCTerm *) term
{
return term;
}
- (long) documentFrequency
{
return docFreq;
}
- (void) close
{
[queue close];
}
@end
@interface LCMultiTermDocuments (LCPrivate)
- (id ) termDocuments: (int) i;
@end
@implementation LCMultiTermDocuments
- (id) init
{
self = [super init];
base = 0;
pointer = 0;
return self;
}
- (id) initWithReaders: (NSArray *) r
starts: (NSArray *) s
{
self = [self init];
ASSIGN(readers, r);
ASSIGN(starts, s);
readerTermDocs = [[NSMutableArray alloc] init];
return self;
}
- (void) dealloc
{
RELEASE(readers);
RELEASE(starts);
RELEASE(readerTermDocs);
RELEASE(current);
[super dealloc];
}
- (long) document
{
return base + [current document];
}
- (long) frequency
{
return [current frequency];
}
- (void) seekTerm: (LCTerm *) t
{
ASSIGN(term, t);
base = 0;
pointer = 0;
DESTROY(current);
}
- (void) seekTermEnumerator: (LCTermEnumerator *) termEnum
{
[self seekTerm: [termEnum term]];
}
- (BOOL) hasNextDocument
{
if (current != nil && [current hasNextDocument]) {
return YES;
} else if (pointer < [readers count]) {
base = [[starts objectAtIndex: pointer] intValue];
ASSIGN(current, [self termDocuments: pointer++]);
return [self hasNextDocument];
} else {
return NO;
}
}
/** Optimized implementation. */
- (int) readDocuments: (NSMutableArray *) docs frequency: (NSMutableArray *) freqs size: (int) size
{
while (YES) {
while (current == nil) {
if (pointer < [readers count]) { // try next segment
base = [[starts objectAtIndex: pointer] intValue];
ASSIGN(current, [self termDocuments: pointer++]);
} else {
return 0;
}
}
int end = [current readDocuments: docs frequency: freqs size: size];
if (end == 0) { // none left in segment
DESTROY(current);
} else { // got some
int b = base; // adjust doc numbers
int i;
for (i = 0; i < end; i++)
{
int tmp = [[docs objectAtIndex: i] intValue] + b;;
[docs replaceObjectAtIndex: i withObject: [NSNumber numberWithInt: tmp]];
}
return end;
}
}
}
/** As yet unoptimized implementation. */
- (BOOL) skipTo: (int) target
{
do {
if (![self hasNextDocument])
return NO;
} while (target > [self document]);
return YES;
}
- (id ) termDocuments: (int) i
{
if (term == nil) return nil;
/* LuceneKit implementation */
id result;
if (i >= [readerTermDocs count]) // Not Exist
{
result = [self termDocumentsWithReader: [readers objectAtIndex: i]];
[readerTermDocs addObject: result];
}
[result seekTerm: term];
return result;
}
- (id ) termDocumentsWithReader: (LCIndexReader *) reader
{
return [reader termDocuments];
}
- (void) close
{
int i;
for (i = 0; i < [readerTermDocs count]; i++) {
if ([readerTermDocs objectAtIndex: i] != nil)
[[readerTermDocs objectAtIndex: i] close];
}
}
@end
@implementation LCMultiTermPositions
- (id ) termDocumentsWithReader: (LCIndexReader *) reader
{
return (id )[reader termPositions];
}
- (int) nextPosition
{
return [(id )current nextPosition];
}
- (NSComparisonResult) compare: (id) o
{
LCMultiTermPositions *other = (LCMultiTermPositions *) o;
if ([self document] < [other document])
return NSOrderedAscending;
else if ([self document] == [other document])
return NSOrderedSame;
else
return NSOrderedDescending;
}
@end