Package org.apache.lucene.store

Examples of org.apache.lucene.store.FSDirectory$FSIndexInput$Descriptor


    {
      System.out.println(
                 "java org.apache.lucene.wordnet.SynExpand <index path> <query>");
    }

    FSDirectory directory = FSDirectory.open(new File(args[0]));
    IndexSearcher searcher = new IndexSearcher(directory, true);

    String query = args[1];
    String field = "contents";

    Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field, 0.9f);
    System.out.println( "Query: " + q.toString( field));



    searcher.close();
    directory.close();
  }
View Full Code Here


    if (args.length != 2) {
      System.out.println(
                 "java org.apache.lucene.wordnet.SynLookup <index path> <word>");
    }

    FSDirectory directory = FSDirectory.open(new File(args[0]));
    IndexSearcher searcher = new IndexSearcher(directory, true);

    String word = args[1];
    Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
    CountingCollector countingCollector = new CountingCollector();
    searcher.search(query, countingCollector);

    if (countingCollector.numHits == 0) {
      System.out.println("No synonyms found for " + word);
    } else {
      System.out.println("Synonyms found for \"" + word + "\":");
    }

    ScoreDoc[] hits = searcher.search(query, countingCollector.numHits).scoreDocs;
   
    for (int i = 0; i < hits.length; i++) {
      Document doc = searcher.doc(hits[i].doc);

      String[] values = doc.getValues(Syns2Index.F_SYN);

      for (int j = 0; j < values.length; j++) {
        System.out.println(values[j]);
      }
    }

    searcher.close();
    directory.close();
  }
View Full Code Here

    String tmpIODir = System.getProperty("tempDir");
    String userName = System.getProperty("user.name");
    String path = tmpIODir + File.separator + "lazyDir" + userName;
    File file = new File(path);
    _TestUtil.rmDir(file);
    FSDirectory tmpDir = FSDirectory.open(file);
    assertTrue(tmpDir != null);

    IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    writer.setUseCompoundFile(false);
    writer.addDocument(testDoc);
View Full Code Here

    private static void index(String indexDir, Map<String,List<String>> word2Nums, Map<String,List<String>> num2Words)
        throws Throwable
    {
        int row = 0;
        int mod = 1;
        FSDirectory dir = FSDirectory.open(new File(indexDir));
        try {

          // override the specific index if it already exists
          IndexWriter writer = new IndexWriter(dir, ana, true, IndexWriter.MaxFieldLength.LIMITED);
          writer.setUseCompoundFile(true); // why?
          Iterator<String> i1 = word2Nums.keySet().iterator();
          while (i1.hasNext()) // for each word
          {
              String g = i1.next();
              Document doc = new Document();

              int n = index(word2Nums, num2Words, g, doc);
              if (n > 0)
              {
          doc.add( new Field( F_WORD, g, Field.Store.YES, Field.Index.NOT_ANALYZED));
                  if ((++row % mod) == 0)
                  {
                      o.println("\trow=" + row + "/" + word2Nums.size() + " doc= " + doc);
                      mod *= 2;
                  }
                  writer.addDocument(doc);
              } // else degenerate
          }
          o.println( "Optimizing..");
          writer.optimize();
          writer.close();
        } finally {
          dir.close();
        }
    }
View Full Code Here

    // create it and force create mode
    if (indexDir.mkdirs() && !overwrite) {
      create = true;
    }

    FSDirectory dir = FSDirectory.open(indexDir);
    try {
      Searcher searcher = null;
      boolean checkLastModified = false;
      if (!create) {
        try {
          searcher = new IndexSearcher(dir, true);
          checkLastModified = true;
        } catch (IOException ioe) {
          log("IOException: " + ioe.getMessage());
          // Empty - ignore, which indicates to index all
          // documents
        }
      }

      log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);

      IndexWriter writer =
        new IndexWriter(dir, analyzer, create, IndexWriter.MaxFieldLength.LIMITED);

      writer.setUseCompoundFile(useCompoundIndex);
      int totalFiles = 0;
      int totalIndexed = 0;
      int totalIgnored = 0;
      try {
        writer.setMergeFactor(mergeFactor);

        for (int i = 0; i < rcs.size(); i++) {
          ResourceCollection rc = rcs.elementAt(i);
          if (rc.isFilesystemOnly()) {
            Iterator resources = rc.iterator();
            while (resources.hasNext()) {
              Resource r = (Resource) resources.next();
              if (!r.isExists() || !(r instanceof FileResource)) {
                continue;
              }
             
              totalFiles++;

              File file = ((FileResource) r).getFile();
             
              if (!file.exists() || !file.canRead()) {
                throw new BuildException("File \"" +
                                         file.getAbsolutePath()
                                         + "\" does not exist or is not readable.");
              }

              boolean indexIt = true;

              if (checkLastModified) {
                Term pathTerm =
                  new Term("path", file.getPath());
                TermQuery query =
                  new TermQuery(pathTerm);
                ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs;

                // if document is found, compare the
                // indexed last modified time with the
                // current file
                // - don't index if up to date
                if (hits.length > 0) {
                  Document doc = searcher.doc(hits[0].doc);
                  String indexModified =
                    doc.get("modified").trim();
                  if (indexModified != null) {
                    long lastModified = 0;
                    try {
                      lastModified = DateTools.stringToTime(indexModified);
                    } catch (ParseException e) {
                      // if modified time is not parsable, skip
                    }
                    if (lastModified == file.lastModified()) {
                      // TODO: remove existing document
                      indexIt = false;
                    }
                  }
                }
              }

              if (indexIt) {
                try {
                  log("Indexing " + file.getPath(),
                      Project.MSG_VERBOSE);
                  Document doc =
                    handler.getDocument(file);

                  if (doc == null) {
                    totalIgnored++;
                  } else {
                    // Add the path of the file as a field named "path".  Use a Keyword field, so
                    // that the index stores the path, and so that the path is searchable
                    doc.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    // Add the last modified date of the file a field named "modified".  Use a
                    // Keyword field, so that it's searchable, but so that no attempt is made
                    // to tokenize the field into words.
                    doc.add(new Field("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    writer.addDocument(doc);
                    totalIndexed++;
                  }
                } catch (DocumentHandlerException e) {
                  throw new BuildException(e);
                }
              }
            }
            // for j
          }
          // if (fs != null)
        }
        // for i

        writer.optimize();
      }
        //try
      finally {
        // always make sure everything gets closed,
        // no matter how we exit.
        writer.close();
        if (searcher != null) {
          searcher.close();
        }
      }

      Date end = new Date();

      log(totalIndexed + " out of " + totalFiles + " indexed (" +
          totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) +
          " milliseconds");
    } finally {
      dir.close();
    }
  }
View Full Code Here

        }
    }

    public void testIndexWriterLockRelease() throws IOException {
        IndexWriter im;
        FSDirectory dir = FSDirectory.open(this.__test_dir);
        try {
            im = new IndexWriter(dir, new org.apache.lucene.analysis.standard.StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
        } catch (FileNotFoundException e) {
            try {
                im = new IndexWriter(dir, new org.apache.lucene.analysis.standard.StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
            } catch (FileNotFoundException e1) {
            }
        } finally {
          dir.close();
        }
    }
View Full Code Here

    _TestUtil.rmDir(dir);
    dir.mkdirs();
    File destDir = new File(tmpDir, "testfilesplitterdest");
    _TestUtil.rmDir(destDir);
    destDir.mkdirs();
    FSDirectory fsDir = FSDirectory.open(dir);
    IndexWriter iw = new IndexWriter(fsDir, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
    for (int x=0; x < 100; x++) {
      Document doc = TestIndexWriterReader.createDocument(x, "index", 5);
      iw.addDocument(doc);
    }
View Full Code Here

    infos.commit(fsDir);
  }

  public void split(File destDir, String[] segs) throws IOException {
    destDir.mkdirs();
    FSDirectory destFSDir = FSDirectory.open(destDir);
    SegmentInfos destInfos = new SegmentInfos();
    destInfos.counter = infos.counter;
    for (String n : segs) {
      SegmentCommitInfo infoPerCommit = getInfo(n);
      SegmentInfo info = infoPerCommit.info;
View Full Code Here

        indexCount = 0;

        // Create a new index file on disk, allowing Lucene to choose
        // the best FSDirectory implementation given the environment.
        FSDirectory index = FSDirectory.open(indexDir);

        // indexing by lower-casing & tokenizing on whitespace
        Analyzer indexAnalyzer = new WhitespaceLowerCaseAnalyzer();

        // create the object that will actually build the Lucene index
        indexWriter = new IndexWriter(index, new IndexWriterConfig(Version.LUCENE_4_9, indexAnalyzer));

        // let's see how long this takes...
        Date start = new Date();

        // if we were given an alternate names file, process it
        if (altNamesFile != null) {
            loadAlternateNames(altNamesFile);
        }

        // load GeoNames gazetteer into Lucene index
        String line;
        int count = 0;
        for (File gazetteer : gazetteerFiles) {
            LOG.info("Processing Gazetteer: {}", gazetteer.getAbsolutePath());
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(gazetteer), "UTF-8"));
            while ((line = reader.readLine()) != null) {
                try {
                    count += 1;
                    // print progress update to console
                    if (count % 100000 == 0 ) {
                        LOG.info("rowcount: " + count);
                    }
                    GeoName geoName = GeoName.parseFromGeoNamesRecord(line);
                    resolveAncestry(geoName);
                } catch (IOException e) {
                    LOG.info("Skipping... Error on line: {}", line);
                } catch (RuntimeException re) {
                    LOG.info("Skipping... Error on line: {}", line);
                }
            }
            reader.close();
        }

        // that wasn't so long, was it?
        Date stop = new Date();

        LOG.info("Unresolved GeoNames (Pre-resolution)");
        logUnresolved();

        resolveUnresolved();

        LOG.info("Unresolved GeoNames (Post-resolution)");
        logUnresolved();

        LOG.info("Indexing unresolved GeoNames.");
        for (Set<GeoName> geos : unresolvedMap.values()) {
            for (GeoName nm : geos) {
                indexGeoName(nm);
            }
        }

        LOG.info("[DONE]");
        LOG.info("{} geonames added to index. ({} records)", indexWriter.maxDoc(), indexCount);
        LOG.info("Merging indices... please wait.");

        indexWriter.close();
        index.close();

        LOG.info("[DONE]");

        DateFormat df = new SimpleDateFormat("HH:mm:ss");
        long elapsed_MILLIS = stop.getTime() - start.getTime();
View Full Code Here

  public static final int DEFAULTnumTerms = 100;
  public static int numTerms = DEFAULTnumTerms;
 
  public static void main(String[] args) throws Exception {
    IndexReader reader = null;
    FSDirectory dir = null;
    String field = null;
    boolean IncludeTermFreqs = false;
  
    if (args.length == 0 || args.length > 4) {
      usage();
View Full Code Here

TOP

Related Classes of org.apache.lucene.store.FSDirectory$FSIndexInput$Descriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.