-
Notifications
You must be signed in to change notification settings - Fork 300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for noob_ FSCrawler on Docker compose #1843
Comments
Where exactly did you put the job settings? |
here is the structure I used
|
You need to change this line:
To
Also note that you might have to change the name setting
To
|
I did the mods above, but still the same error:
This is the debug:
|
Could you share the full logs and switch to trace mode? |
How to start Trace mode?
|
May be try this:
|
Here is the output for the trace command:
|
Could you share again:
Thanks |
Sure here they are:
---
name: "documents_search"
fs:
url: "C:\Users\shouari\Documents\Documentation_es\data"
indexed_chars: 100%
lang_detect: true
continue_on_error: true
ocr:
language: "eng+fra"
enabled: true
pdf_strategy: "ocr_and_text"
elasticsearch:
nodes:
- url: "https://elasticsearch:9200"
username: "elastic"
password: "a123456"
ssl_verification: false
rest :
url: "http://fscrawler:8080"
'version: "2.2"
services:
setup:
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- certs:/usr/share/elasticsearch/config/certs
user: "0"
command: >
bash -c '
if [ x${ELASTIC_PASSWORD} == x ]; then
echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
exit 1;
elif [ x${KIBANA_PASSWORD} == x ]; then
echo "Set the KIBANA_PASSWORD environment variable in the .env file";
exit 1;
fi;
if [ ! -f certs/ca.zip ]; then
echo "Creating CA";
bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
unzip config/certs/ca.zip -d config/certs;
fi;
if [ ! -f certs/certs.zip ]; then
echo "Creating certs";
echo -ne \
"instances:\n"\
" - name: elasticsearch\n"\
" dns:\n"\
" - elasticsearch\n"\
" - localhost\n"\
" ip:\n"\
" - 127.0.0.1\n"\
> config/certs/instances.yml;
bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
unzip config/certs/certs.zip -d config/certs;
fi;
echo "Setting file permissions"
chown -R root:root config/certs;
find . -type d -exec chmod 750 \{\} \;;
find . -type f -exec chmod 640 \{\} \;;
echo "Waiting for Elasticsearch availability";
until curl -s --cacert config/certs/ca/ca.crt https://elasticsearch:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
echo "Setting kibana_system password";
until curl -s -X POST --cacert config/certs/ca/ca.crt -u elastic:${ELASTIC_PASSWORD} -H "Content-Type: application/json" https://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
echo "All done!";
'
healthcheck:
test: ["CMD-SHELL", "[ -f config/certs/elasticsearch/elasticsearch.crt ]"]
interval: 1s
timeout: 5s
retries: 120
elasticsearch:
depends_on:
setup:
condition: service_healthy
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- certs:/usr/share/elasticsearch/config/certs
- esdata:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
environment:
- node.name=elasticsearch
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=elasticsearch
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- bootstrap.memory_lock=true
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/elasticsearch/elasticsearch.key
- xpack.security.http.ssl.certificate=certs/elasticsearch/elasticsearch.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.http.ssl.verification_mode=certificate
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/elasticsearch/elasticsearch.key
- xpack.security.transport.ssl.certificate=certs/elasticsearch/elasticsearch.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.license.self_generated.type=${LICENSE}
mem_limit: ${MEM_LIMIT}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120
kibana:
depends_on:
elasticsearch:
condition: service_healthy
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
volumes:
- certs:/usr/share/kibana/config/certs
- kibanadata:/usr/share/kibana/data
ports:
- ${KIBANA_PORT}:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=https://elasticsearch:9200
- ELASTICSEARCH_USERNAME=kibana_system
- ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
- ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
- ENTERPRISESEARCH_HOST=http://localhost:${ENTERPRISE_SEARCH_PORT}
mem_limit: ${MEM_LIMIT}
healthcheck:
test:
[
"CMD-SHELL",
"curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
]
interval: 10s
timeout: 10s
retries: 120
'# FSCrawler
fscrawler:
image: dadoonet/fscrawler:$FSCRAWLER_VERSION
container_name: fscrawler
restart: always
volumes:
- ../../test-documents/src/main/resources/documents/:/tmp/es:ro
- ${PWD}/config:/root/.fscrawler
- ${PWD}/logs:/usr/share/fscrawler/logs
- ${PWD}/external:/usr/share/fscrawler/external
depends_on:
elasticsearch:
condition: service_healthy
ports:
- ${FSCRAWLER_PORT}:8080
command: fscrawler documents_search --trace --restart --rest
volumes:
certs:
driver: local
'# enterprisesearchdata:
' # driver: local
esdata:
driver: local
kibanadata:
driver: local |
Try this: ---
name: "documents_search"
fs:
url: "/tmp/es"
indexed_chars: 100%
lang_detect: true
continue_on_error: true
ocr:
language: "eng+fra"
enabled: true
pdf_strategy: "ocr_and_text"
elasticsearch:
nodes:
- url: "https://elasticsearch:9200"
username: "elastic"
password: "a123456"
ssl_verification: false
rest :
url: "http://fscrawler:8080" And change this volume to mount your document folder instead: - ../../test-documents/src/main/resources/documents/:/tmp/es:ro If it does not work, please inspect your container and check that |
I still face the same issue, I'll have a look at the If it does not exist, what might be the cause according to you? |
While running the FSCrawler o via docker compose, I face this error.
2024-03-25 17:47:19 21:47:19,717 ERROR [f.p.e.c.f.c.FsCrawlerCli] job [doc_idx] does not exist. Exiting as we are in silent mode or no input available.
Here is
_settings
:and here is the
docker-compose
file fscrawler sectionCan you please help with this?
The text was updated successfully, but these errors were encountered: