Package org.carrot2.core

Examples of org.carrot2.core.Document$DocumentToId


        .getLexicalData(LanguageCode.MALTESE);

    for (String word : wordsToCheck.split(",")) {
      if (!lexicalData.isCommonWord(new MutableCharArray(word))
          && !lexicalData.isStopLabel(word)) {
        clusters.add(new Cluster(word));
      }
    }
  }
View Full Code Here


  @Test
  public void testSimple() throws Exception {
    //<start id="crt2.simple"/>
    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

    /*
 
View Full Code Here

    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
View Full Code Here

          if (highlt != null && highlt.length == 1) {
            snippet = highlt[0];
          }
        }
      }
      Document carrotDocument = new Document(getValue(sdoc, titleField),
              snippet, (String)sdoc.getFieldValue(urlField));
      carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
      result.add(carrotDocument);
    }

    return result;
  }
View Full Code Here

        {
            final List entries = feed.getEntries();
            for (Iterator it = entries.iterator(); it.hasNext();)
            {
                final SyndEntry entry = (SyndEntry) it.next();
                final Document document = new Document();

                document.setField(Document.TITLE, clean(entry.getTitle()));
                document.setField(Document.SUMMARY, clean(entry.getDescription()
                    .getValue()));
                document.setField(Document.CONTENT_URL, entry.getLink());

                response.results.add(document);
            }
        }
View Full Code Here

        final List<Document> outputDocuments = Lists
            .newArrayListWithCapacity(inputDocuments.size());
       
        for (Document document : inputDocuments)
        {
            final Document clonedDocument = document.clone();
            for (String fieldName : fields)
            {
                highlightQueryTerms(clonedDocument, fieldName, queryPattern);
            }
            outputDocuments.add(clonedDocument);
View Full Code Here

        if (etools.documents != null)
        {
            for (Document etoolsDocument : etools.documents)
            {
                final Document matchingGoogleDocument = googleDocumentsByUrl
                    .get(etoolsDocument.getField(Document.CONTENT_URL));
                if (matchingGoogleDocument != null)
                {
                    final List<String> sources = etoolsDocument
                        .getField(Document.SOURCES);
                    if (!sources.contains("Google"))
                    {
                        sources.add("Google");
                    }
                    matchingGoogleDocument.setField(Document.SOURCES, sources);
                }
                else
                {
                    response.results.add(etoolsDocument);
                }
View Full Code Here

         * document separately.
         */
        final List<Document> documents = Lists.newArrayList();
        for (Document document : SampleDocumentData.DOCUMENTS_DATA_MINING)
        {
            documents.add(new Document(document.getTitle(), document.getSummary(),
                document.getContentUrl(), LanguageCode.ENGLISH));
        }

        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
View Full Code Here

            /* Prepare Carrot2 documents */
            final ArrayList<Document> documents = new ArrayList<Document>();
            for (String [] row : data)
            {
                documents.add(new Document(row[1], row[2], row[0]));
            }

            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();

View Full Code Here

        final List<Cluster> clusters = Lists.newArrayList();

        final ContaminationMetric metric = new ContaminationMetric();
        metric.documents = documents;

        final Document d1 = new Document();
        final Document d2 = documentWithPartitions("test");
        documents.add(d1);

        final Cluster c1 = new Cluster("test", d1);
        clusters.add(c1);
View Full Code Here

TOP

Related Classes of org.carrot2.core.Document$DocumentToId

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.